Support Questions

Find answers, ask questions, and share your expertise

Unable to configure CM 5.8 on AWS

avatar
Contributor

Using the following Configuration file, it dies...

 

This is patterned off of the provided example from Cloudera.. it builds fine when hdfs-master count = 1, but when making it 2 to have more than a single NameNode, etc.. it fails..

 


cloudera-manager {

instance: ${instances.m42x} {
tags {
# add any additional tags as needed
application: "Cloudera Manager 5"
}
}
repository: "http://archive.cloudera.com/cm5/redhat/6/x86_64/cm/5.8/"
repositoryKeyUrl: "http://archive.cloudera.com/cm5/redhat/6/x86_64/cm/RPM-GPG-KEY-cloudera"

enableEnterpriseTrial: true
}

cluster {

products {
CDH: 5
}

configs {
# HDFS fencing should be set to true for HA configurations
HDFS {
dfs_ha_fencing_methods: "shell(true)"
}

}

 

services: [HDFS, ZOOKEEPER, HBASE, HIVE ]

hdfsmaster-1 {
count: 2

instance: ${instances.m42x} {
tags {
group: hdfsmaster-1
}
}

roles {
HDFS: [NAMENODE, JOURNALNODE, FAILOVERCONTROLLER, BALANCER]
ZOOKEEPER: [SERVER]
HBASE: [MASTER]
}

# NameNode nameservice, autofailover, and quorum journal name must be configured for high availability
configs {
HDFS {
NAMENODE {
dfs_federation_namenode_nameservice: hanameservice
autofailover_enabled: true
dfs_namenode_quorum_journal_name: hanameservice
}
}
}


}

hdfsmaster-2 {
count: 1

instance: ${instances.m42x} {
tags {
group: hdfsmaster-2
}
}

roles {
HDFS: [ JOURNALNODE]
ZOOKEEPER: [SERVER]
}

}

master-1 {
count: 1

instance: ${instances.m42x} {
tags {
group: master
}
}

roles {
HBASE: [MASTER]
ZOOKEEPER: [SERVER]
}


}

workers {
count: 3
minCount: 1

instance: ${instances.c34x} {

# placementGroup: REPLACE-ME

tags {
group: worker
}
}

roles {
HDFS: [DATANODE]
HBASE: [REGIONSERVER]
}
}

gateways {
count: 1

instance: ${instances.t2l} {
tags {
group: gateway
}
}

roles {
HIVE: [GATEWAY]
}

}

postCreateScripts: ["""#!/bin/sh

echo 'Hello World!'
exit 0
""",
"""#!/usr/bin/python

print 'Hello again!'
"""]

preTerminateScripts: ["""#!/bin/sh

echo 'Goodbye World!'
exit 0
""",
"""#!/usr/bin/python

print 'Goodbye again!'
"""]

 

1 ACCEPTED SOLUTION

avatar
Contributor

Below is my script that I got running.. but sanitized AWS credentials, network, security.. etc 

 

In shows how to

  • attach multiple EBS volumes (2),
  • set delete on terminate so that they clean up,
  • tag the volumes/resources

 

 

################################################################################

#   begin

#

 

  • name: INSERT-NAME-OF-CLUSTER  #Ultimately should have comments on each line for understanding.

 

provider {

   type: aws

   region: us-west-2

   subnetId: INSERT AWS Subnet ID

   securityGroupsIds: INSERT SG-ID  or create IAM Profile

   instanceNamePrefix: INSERT-NAME-OF-CLUSTER

 

   rootVolumeSizeGB: 100

   rootVolumeType: gp2

   # associatePublicIpAddresses: true

   # spotBimi-e3d81583dUSDPerHr: 0.50

}

 

ssh {

   username: ec2-user # for RHEL image

   privateKey: keypair.pem # with an absolute path to .pem file

}

 

instances {

 

m42x {

   type: m4.2xlarge   # requires an HVM AMI

   image: INSERT AMI

   rootVolumeSizeGB: 100

 

   # iamProfileName: iam-profile-REPLACE-ME

 

   tags {

       owner: ${?USER}

       Project: "INSERT Project name"

       Vertical: "Insert Project vertical"

   }

 

   bootstrapScript: """#!/bin/sh

#

# This is an embedded bootstrap script that runs as root and can be used to customize

# the instances immediately after boot and before any other Cloudera Director action

export AWS_ACCESS_KEY_ID=INSERT AWS Access Key

export AWS_SECRET_ACCESS_KEY=INSERT AWS Secret Key

export INSTANCE_ID=$(curl http://instance-data/latest/meta-data/instance-id)

export AVAILABILITY_ZONE=$(curl http://instance-data/latest/meta-data/placement/availability-zone)

 

export VOL1_ID=$(aws ec2 create-volume --size 15 --region us-west-2 --availability-zone ${AVAILABILITY_ZONE} --volume-type gp2 --query "VolumeId" | tr -d '"')

export VOL2_ID=$(aws ec2 create-volume --size 15 --region us-west-2 --availability-zone ${AVAILABILITY_ZONE} --volume-type gp2 --query "VolumeId" | tr -d '"')

 

 

echo 'Sleeping 30'

sleep 30

echo 'Create additional EBS volumes'

aws ec2 attach-volume --region us-west-2 --volume-id ${VOL1_ID} --instance-id ${INSTANCE_ID} --device /dev/sdf

aws ec2 attach-volume --region us-west-2 --volume-id ${VOL2_ID} --instance-id ${INSTANCE_ID} --device /dev/sdg

 

 

# Create filesystems on the devices

echo 'Create filesystems'

mkfs.ext4 /dev/xvdf

mkfs.ext4 /dev/xvdg

 

 

# Create directories for the mount points

mkdir /data1

mkdir /data2

 

 

# Add the mount points to /etc/fstab

echo "/dev/xvdf /data1 ext4 defaults 0 0" >> /etc/fstab

echo "/dev/xvdg /data2 ext4 defaults 0 0" >> /etc/fstab

 

sleep 10

 

# Mount all the devices

echo 'Mount drives'

mount -a

#

# modify attributes of new volumes to delete on terminate

echo 'Modify volume attributes'

aws ec2 modify-instance-attribute --instance-id ${INSTANCE_ID} --region us-west-2 --block-device-mappings "[{\"DeviceName\": \"/dev/sdf\",\"Ebs\":{\"DeleteOnTermination\":true}}]"

aws ec2 modify-instance-attribute --instance-id ${INSTANCE_ID} --region us-west-2 --block-device-mappings "[{\"DeviceName\": \"/dev/sdg\",\"Ebs\":{\"DeleteOnTermination\":true}}]"

 

 

#

# tag the volumes

echo 'Tag resources'

 

aws ec2 create-tags --region us-west-2 --resources ${INSTANCE_ID} --tags Key=Application,Value=BIGDATA   Key=Vertical,Value=VerticalName Key=Project,Value="Project name"

aws ec2 create-tags --region us-west-2 --resources ${VOL1_ID} --tags Key=Application,Value=BIGDATA   Key=Vertical,Value=VerticalName Key=Project,Value="Project name"

aws ec2 create-tags --region us-west-2 --resources ${VOL2_ID} --tags Key=Application,Value=BIGDATA   Key=Vertical,Value=VerticalName Key=Project,Value="Project name"

 

#

 

 

 

# If the exit code is not zero Cloudera Director will automatically retry

echo 'This is the last line of the script'

exit 0

 

"""

 

   #

   # Flag indicating whether to normalize the instance. Not setting normalization here implies that your

   # bootstrap script will take care of normalization. This is an advanced configuration that will require

   # assistance from Cloudera support.

   #

   # Normalization includes:

   #   downloading and installing packages

   #   minimizing swappiness

   #   increasing the maximun number of open files

   #   mounting ephemeral disks

   #   resizing the root partition.

   #

   # Defaults to true

   #

 

normalizeInstance: true

 

}

 

m44x {

   type: m4.4xlarge

   image: ami

 

   tags {

     owner: ${?USER}

   }

}

 

c34x {

   type: c3.4xlarge

   image: ami

 

   tags {

     owner: ${?USER}

   }

}

 

i2x {

   type: i2.xlarge

   image: ami

 

   tags {

     owner: ${?USER}

   }

}

 

i22x {

   type: i2.2xlarge

   image: ami

 

   tags {

     owner: ${?USER}

   }

}

 

r32x {

   type: r3.2xlarge

   image: ami

 

   tags {

     owner: ${?USER}

   }

}

 

t2l {   # only suitable as a gateway

   type: t2.large

   image: ami

 

   tags {

     owner: ${?USER}

   }

}

}

 

cloudera-manager {

 

   instance: ${instances.m42x} {

       tags {

           # add any additional tags as needed

       Application: "Cloudera Manager 5"

       Project: "place project name here"

       Vertical: "place vertical name here"

 

 

       }

   }

   repository: "http://archive.cloudera.com/cm5/redhat/6/x86_64/cm/5.8/"

   repositoryKeyUrl: "http://archive.cloudera.com/cm5/redhat/6/x86_64/cm/RPM-GPG-KEY-cloudera"

 

   enableEnterpriseTrial: true

}

 

cluster {

 

   products {

     CDH: 5

   }

 

   configs {

         # HDFS fencing should be set to true for HA configurations

         HDFS {

             dfs_ha_fencing_methods: "shell(true)"

         }

 

       }

 

 

 

   services: [HDFS, ZOOKEEPER, HBASE ]

 

   hdfsmaster-1 {

     count: 2

 

     instance: ${instances.m44x} {

       tags {

         group: hdfsmaster-1

       }

     }

 

     roles {

       HDFS: [NAMENODE, JOURNALNODE, FAILOVERCONTROLLER ]

       ZOOKEEPER: [SERVER]

     }

 

# NameNode nameservice, autofailover, and quorum journal name must be configured for high availability

       configs {

               HDFS {

               NAMENODE {

                       dfs_federation_namenode_nameservice: hanameservice

                       autofailover_enabled: true

                       dfs_namenode_quorum_journal_name: hanameservice

                       }

               }

       }

 

 

   }

 

   hdfsmaster-2 {

     count: 1

 

     instance: ${instances.m44x} {

       tags {

         group: hdfsmaster-2

       }

     }

 

     roles {

       HDFS: [ JOURNALNODE, BALANCER]

       ZOOKEEPER: [SERVER]

       HBASE: [MASTER]

     }

 

   }

 

   master-1 {

     count: 1

 

     instance: ${instances.m44x} {

       tags {

         group: master

       }

     }

 

     roles {

       HBASE: [MASTER]

       ZOOKEEPER: [SERVER]

     }

 

 

   }

 

   workers {

     count: 3

     minCount: 1

 

     instance: ${instances.m42x} {

 

         # placementGroup: REPLACE-ME

 

       tags {

         group: worker

       }

     }

 

     roles {

       HDFS: [DATANODE]

       HBASE: [REGIONSERVER]

     }

   }

 

 

   postCreateScripts: ["""#!/bin/sh

echo 'Hello World!'

exit 0

   """,

   """#!/usr/bin/python

 

print 'Hello again!'

   """]

 

   preTerminateScripts: ["""#!/bin/sh

 

echo 'Goodbye World!'

exit 0

   """,

   """#!/usr/bin/python

 

print 'Goodbye again!'

       """]

 

}

 

View solution in original post

5 REPLIES 5

avatar
Contributor

I used the same HA config files as shown in this topic, and in my cluster it created the instances correctly, but in Cloudera Director, it fails as bootstrap error.  Is there a sample bootstrap script that we could use to add to the sample HA config file to populate the instances with required ROLES services?

 

 

avatar
Contributor

Below is my script that I got running.. but sanitized AWS credentials, network, security.. etc 

 

In shows how to

  • attach multiple EBS volumes (2),
  • set delete on terminate so that they clean up,
  • tag the volumes/resources

 

 

################################################################################

#   begin

#

 

  • name: INSERT-NAME-OF-CLUSTER  #Ultimately should have comments on each line for understanding.

 

provider {

   type: aws

   region: us-west-2

   subnetId: INSERT AWS Subnet ID

   securityGroupsIds: INSERT SG-ID  or create IAM Profile

   instanceNamePrefix: INSERT-NAME-OF-CLUSTER

 

   rootVolumeSizeGB: 100

   rootVolumeType: gp2

   # associatePublicIpAddresses: true

   # spotBimi-e3d81583dUSDPerHr: 0.50

}

 

ssh {

   username: ec2-user # for RHEL image

   privateKey: keypair.pem # with an absolute path to .pem file

}

 

instances {

 

m42x {

   type: m4.2xlarge   # requires an HVM AMI

   image: INSERT AMI

   rootVolumeSizeGB: 100

 

   # iamProfileName: iam-profile-REPLACE-ME

 

   tags {

       owner: ${?USER}

       Project: "INSERT Project name"

       Vertical: "Insert Project vertical"

   }

 

   bootstrapScript: """#!/bin/sh

#

# This is an embedded bootstrap script that runs as root and can be used to customize

# the instances immediately after boot and before any other Cloudera Director action

export AWS_ACCESS_KEY_ID=INSERT AWS Access Key

export AWS_SECRET_ACCESS_KEY=INSERT AWS Secret Key

export INSTANCE_ID=$(curl http://instance-data/latest/meta-data/instance-id)

export AVAILABILITY_ZONE=$(curl http://instance-data/latest/meta-data/placement/availability-zone)

 

export VOL1_ID=$(aws ec2 create-volume --size 15 --region us-west-2 --availability-zone ${AVAILABILITY_ZONE} --volume-type gp2 --query "VolumeId" | tr -d '"')

export VOL2_ID=$(aws ec2 create-volume --size 15 --region us-west-2 --availability-zone ${AVAILABILITY_ZONE} --volume-type gp2 --query "VolumeId" | tr -d '"')

 

 

echo 'Sleeping 30'

sleep 30

echo 'Create additional EBS volumes'

aws ec2 attach-volume --region us-west-2 --volume-id ${VOL1_ID} --instance-id ${INSTANCE_ID} --device /dev/sdf

aws ec2 attach-volume --region us-west-2 --volume-id ${VOL2_ID} --instance-id ${INSTANCE_ID} --device /dev/sdg

 

 

# Create filesystems on the devices

echo 'Create filesystems'

mkfs.ext4 /dev/xvdf

mkfs.ext4 /dev/xvdg

 

 

# Create directories for the mount points

mkdir /data1

mkdir /data2

 

 

# Add the mount points to /etc/fstab

echo "/dev/xvdf /data1 ext4 defaults 0 0" >> /etc/fstab

echo "/dev/xvdg /data2 ext4 defaults 0 0" >> /etc/fstab

 

sleep 10

 

# Mount all the devices

echo 'Mount drives'

mount -a

#

# modify attributes of new volumes to delete on terminate

echo 'Modify volume attributes'

aws ec2 modify-instance-attribute --instance-id ${INSTANCE_ID} --region us-west-2 --block-device-mappings "[{\"DeviceName\": \"/dev/sdf\",\"Ebs\":{\"DeleteOnTermination\":true}}]"

aws ec2 modify-instance-attribute --instance-id ${INSTANCE_ID} --region us-west-2 --block-device-mappings "[{\"DeviceName\": \"/dev/sdg\",\"Ebs\":{\"DeleteOnTermination\":true}}]"

 

 

#

# tag the volumes

echo 'Tag resources'

 

aws ec2 create-tags --region us-west-2 --resources ${INSTANCE_ID} --tags Key=Application,Value=BIGDATA   Key=Vertical,Value=VerticalName Key=Project,Value="Project name"

aws ec2 create-tags --region us-west-2 --resources ${VOL1_ID} --tags Key=Application,Value=BIGDATA   Key=Vertical,Value=VerticalName Key=Project,Value="Project name"

aws ec2 create-tags --region us-west-2 --resources ${VOL2_ID} --tags Key=Application,Value=BIGDATA   Key=Vertical,Value=VerticalName Key=Project,Value="Project name"

 

#

 

 

 

# If the exit code is not zero Cloudera Director will automatically retry

echo 'This is the last line of the script'

exit 0

 

"""

 

   #

   # Flag indicating whether to normalize the instance. Not setting normalization here implies that your

   # bootstrap script will take care of normalization. This is an advanced configuration that will require

   # assistance from Cloudera support.

   #

   # Normalization includes:

   #   downloading and installing packages

   #   minimizing swappiness

   #   increasing the maximun number of open files

   #   mounting ephemeral disks

   #   resizing the root partition.

   #

   # Defaults to true

   #

 

normalizeInstance: true

 

}

 

m44x {

   type: m4.4xlarge

   image: ami

 

   tags {

     owner: ${?USER}

   }

}

 

c34x {

   type: c3.4xlarge

   image: ami

 

   tags {

     owner: ${?USER}

   }

}

 

i2x {

   type: i2.xlarge

   image: ami

 

   tags {

     owner: ${?USER}

   }

}

 

i22x {

   type: i2.2xlarge

   image: ami

 

   tags {

     owner: ${?USER}

   }

}

 

r32x {

   type: r3.2xlarge

   image: ami

 

   tags {

     owner: ${?USER}

   }

}

 

t2l {   # only suitable as a gateway

   type: t2.large

   image: ami

 

   tags {

     owner: ${?USER}

   }

}

}

 

cloudera-manager {

 

   instance: ${instances.m42x} {

       tags {

           # add any additional tags as needed

       Application: "Cloudera Manager 5"

       Project: "place project name here"

       Vertical: "place vertical name here"

 

 

       }

   }

   repository: "http://archive.cloudera.com/cm5/redhat/6/x86_64/cm/5.8/"

   repositoryKeyUrl: "http://archive.cloudera.com/cm5/redhat/6/x86_64/cm/RPM-GPG-KEY-cloudera"

 

   enableEnterpriseTrial: true

}

 

cluster {

 

   products {

     CDH: 5

   }

 

   configs {

         # HDFS fencing should be set to true for HA configurations

         HDFS {

             dfs_ha_fencing_methods: "shell(true)"

         }

 

       }

 

 

 

   services: [HDFS, ZOOKEEPER, HBASE ]

 

   hdfsmaster-1 {

     count: 2

 

     instance: ${instances.m44x} {

       tags {

         group: hdfsmaster-1

       }

     }

 

     roles {

       HDFS: [NAMENODE, JOURNALNODE, FAILOVERCONTROLLER ]

       ZOOKEEPER: [SERVER]

     }

 

# NameNode nameservice, autofailover, and quorum journal name must be configured for high availability

       configs {

               HDFS {

               NAMENODE {

                       dfs_federation_namenode_nameservice: hanameservice

                       autofailover_enabled: true

                       dfs_namenode_quorum_journal_name: hanameservice

                       }

               }

       }

 

 

   }

 

   hdfsmaster-2 {

     count: 1

 

     instance: ${instances.m44x} {

       tags {

         group: hdfsmaster-2

       }

     }

 

     roles {

       HDFS: [ JOURNALNODE, BALANCER]

       ZOOKEEPER: [SERVER]

       HBASE: [MASTER]

     }

 

   }

 

   master-1 {

     count: 1

 

     instance: ${instances.m44x} {

       tags {

         group: master

       }

     }

 

     roles {

       HBASE: [MASTER]

       ZOOKEEPER: [SERVER]

     }

 

 

   }

 

   workers {

     count: 3

     minCount: 1

 

     instance: ${instances.m42x} {

 

         # placementGroup: REPLACE-ME

 

       tags {

         group: worker

       }

     }

 

     roles {

       HDFS: [DATANODE]

       HBASE: [REGIONSERVER]

     }

   }

 

 

   postCreateScripts: ["""#!/bin/sh

echo 'Hello World!'

exit 0

   """,

   """#!/usr/bin/python

 

print 'Hello again!'

   """]

 

   preTerminateScripts: ["""#!/bin/sh

 

echo 'Goodbye World!'

exit 0

   """,

   """#!/usr/bin/python

 

print 'Goodbye again!'

       """]

 

}

 

avatar
Contributor

jnlkds:

 

Thanks, I'll try out the code you had pasted here and see if it works in my environment.

 

 

avatar
Contributor

So far, I have not been successful.  I tried in my AWS environment and there is a bit of improvement to the script you provided... So, I'm modifying the script to match my AWS environment.  Will update later as now it is WIP.

avatar
Contributor

let me know.. as i know it works.. no worries!