Created 10-06-2016 06:31 PM
Hi,
I'm experimenting Cloudbreak to setup a Ambari2.4/HDP2.5 cluster quickly. There are 3 blueprints out of box, but none of them is for HA.
I followed this wiki page https://cwiki.apache.org/confluence/display/AMBARI/Blueprint+Support+for+HA+Clusters#BlueprintSuppor...
I can only make the NameNode HA work. Here is the working blueprint.
{
"Blueprints": {
"blueprint_name": "hdp-ha",
"stack_name": "HDP",
"stack_version": "2.5"
},
"host_groups": [
{
"name": "gateway",
"components": [
{
"name": "ZOOKEEPER_CLIENT"
},
{
"name": "KNOX_GATEWAY"
},
{
"name": "METRICS_MONITOR"
},
{
"name": "TEZ_CLIENT"
},
{
"name": "SLIDER"
},
{
"name": "SQOOP"
},
{
"name": "HDFS_CLIENT"
},
{
"name": "YARN_CLIENT"
},
{
"name": "METRICS_COLLECTOR"
},
{
"name": "MAPREDUCE2_CLIENT"
}
],
"cardinality": "1"
},
{
"name": "master_3",
"components": [
{
"name": "ZOOKEEPER_SERVER"
},
{
"name": "ZKFC"
},
{
"name": "JOURNALNODE"
},
{
"name": "METRICS_MONITOR"
},
{
"name": "NAMENODE"
}
],
"cardinality": "1"
},
{
"name": "slave_1",
"components": [
{
"name": "NODEMANAGER"
},
{
"name": "METRICS_MONITOR"
},
{
"name": "DATANODE"
}
],
"cardinality": "3"
},
{
"name": "master_2",
"components": [
{
"name": "ZOOKEEPER_SERVER"
},
{
"name": "JOURNALNODE"
},
{
"name": "METRICS_MONITOR"
},
{
"name": "RESOURCEMANAGER"
},
{
"name": "APP_TIMELINE_SERVER"
}
],
"cardinality": "1"
},
{
"name": "master_1",
"components": [
{
"name": "ZOOKEEPER_SERVER"
},
{
"name": "JOURNALNODE"
},
{
"name": "HISTORYSERVER"
},
{
"name": "NAMENODE"
},
{
"name": "ZKFC"
},
{
"name": "METRICS_MONITOR"
}
],
"cardinality": "1"
}
],
"configurations": [
{
"core-site": {
"properties": {
"fs.defaultFS": "hdfs://mycluster",
"ha.zookeeper.quorum": "%HOSTGROUP::master_1%:2181,%HOSTGROUP::master_2%:2181,%HOSTGROUP::master_3%:2181"
}
}
},
{
"hdfs-site": {
"properties": {
"dfs.client.failover.proxy.provider.mycluster": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",
"dfs.ha.automatic-failover.enabled": "true",
"dfs.ha.fencing.methods": "shell(/bin/true)",
"dfs.ha.namenodes.mycluster": "nn1,nn2",
"dfs.namenode.http-address": "%HOSTGROUP::master_1%:50070",
"dfs.namenode.http-address.mycluster.nn1": "%HOSTGROUP::master_1%:50070",
"dfs.namenode.http-address.mycluster.nn2": "%HOSTGROUP::master_3%:50070",
"dfs.namenode.https-address": "%HOSTGROUP::master_1%:50470",
"dfs.namenode.https-address.mycluster.nn1": "%HOSTGROUP::master_1%:50470",
"dfs.namenode.https-address.mycluster.nn2": "%HOSTGROUP::master_3%:50470",
"dfs.namenode.rpc-address.mycluster.nn1": "%HOSTGROUP::master_1%:8020",
"dfs.namenode.rpc-address.mycluster.nn2": "%HOSTGROUP::master_3%:8020",
"dfs.namenode.shared.edits.dir": "qjournal://%HOSTGROUP::master_1%:8485;%HOSTGROUP::master_2%:8485;%HOSTGROUP::master_3%:8485/mycluster",
"dfs.nameservices": "mycluster"
}
}
}
]
}The ResourceManager HA always got error "java.lang.IllegalArgumentException: Unable to update configuration property 'yarn.resourcemanager.admin.address' with topology information. Component 'RESOURCEMANAGER' is mapped to an invalid number of hosts '2'"
The blueprint for NN+RM HA is
{
"Blueprints": {
"blueprint_name": "hdp-ha",
"stack_name": "HDP",
"stack_version": "2.5"
},
"host_groups": [
{
"name": "gateway",
"components": [
{
"name": "ZOOKEEPER_CLIENT"
},
{
"name": "KNOX_GATEWAY"
},
{
"name": "METRICS_MONITOR"
},
{
"name": "TEZ_CLIENT"
},
{
"name": "SLIDER"
},
{
"name": "SQOOP"
},
{
"name": "HDFS_CLIENT"
},
{
"name": "YARN_CLIENT"
},
{
"name": "METRICS_COLLECTOR"
},
{
"name": "MAPREDUCE2_CLIENT"
}
],
"cardinality": "1"
},
{
"name": "slave_1",
"components": [
{
"name": "NODEMANAGER"
},
{
"name": "METRICS_MONITOR"
},
{
"name": "DATANODE"
}
],
"cardinality": "3"
},
{
"name": "master_1",
"components": [
{
"name": "ZOOKEEPER_SERVER"
},
{
"name": "HDFS_CLIENT"
},
{
"name": "YARN_CLIENT"
},
{
"name": "JOURNALNODE"
},
{
"name": "HISTORYSERVER"
},
{
"name": "NAMENODE"
},
{
"name": "ZKFC"
},
{
"name": "METRICS_MONITOR"
}
],
"cardinality": "1"
},
{
"name": "master_2",
"components": [
{
"name": "ZOOKEEPER_SERVER"
},
{
"name": "HDFS_CLIENT"
},
{
"name": "YARN_CLIENT"
},
{
"name": "JOURNALNODE"
},
{
"name": "METRICS_MONITOR"
},
{
"name": "RESOURCEMANAGER"
},
{
"name": "APP_TIMELINE_SERVER"
}
],
"cardinality": "1"
},
{
"name": "master_3",
"components": [
{
"name": "ZOOKEEPER_SERVER"
},
{
"name": "HDFS_CLIENT"
},
{
"name": "YARN_CLIENT"
},
{
"name": "ZKFC"
},
{
"name": "JOURNALNODE"
},
{
"name": "METRICS_MONITOR"
},
{
"name": "NAMENODE"
},
{
"name": "RESOURCEMANAGER"
}
],
"cardinality": "1"
}
],
"configurations": [
{
"core-site": {
"properties": {
"fs.defaultFS": "hdfs://mycluster",
"ha.zookeeper.quorum": "%HOSTGROUP::master_1%:2181,%HOSTGROUP::master_2%:2181,%HOSTGROUP::master_3%:2181",
"hadoop.proxyuser.yarn.hosts": "%HOSTGROUP::master_2%,%HOSTGROUP::master_3%"
}
}
},
{
"hdfs-site": {
"properties": {
"dfs.client.failover.proxy.provider.mycluster": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",
"dfs.ha.automatic-failover.enabled": "true",
"dfs.ha.fencing.methods": "shell(/bin/true)",
"dfs.ha.namenodes.mycluster": "nn1,nn2",
"dfs.namenode.http-address": "%HOSTGROUP::master_1%:50070",
"dfs.namenode.http-address.mycluster.nn1": "%HOSTGROUP::master_1%:50070",
"dfs.namenode.http-address.mycluster.nn2": "%HOSTGROUP::master_3%:50070",
"dfs.namenode.https-address": "%HOSTGROUP::master_1%:50470",
"dfs.namenode.https-address.mycluster.nn1": "%HOSTGROUP::master_1%:50470",
"dfs.namenode.https-address.mycluster.nn2": "%HOSTGROUP::master_3%:50470",
"dfs.namenode.rpc-address.mycluster.nn1": "%HOSTGROUP::master_1%:8020",
"dfs.namenode.rpc-address.mycluster.nn2": "%HOSTGROUP::master_3%:8020",
"dfs.namenode.shared.edits.dir": "qjournal://%HOSTGROUP::master_1%:8485;%HOSTGROUP::master_2%:8485;%HOSTGROUP::master_3%:8485/mycluster",
"dfs.nameservices": "mycluster"
}
}
},
{
"yarn-site": {
"properties": {
"yarn.resourcemanager.ha.enabled": "true",
"yarn.resourcemanager.ha.rm-ids": "rm1,rm2",
"yarn.resourcemanager.hostname.rm1": "%HOSTGROUP::master_2%",
"yarn.resourcemanager.hostname.rm2": "%HOSTGROUP::master_3%",
"yarn.resourcemanager.webapp.address.rm1": "%HOSTGROUP::master_2%:8088",
"yarn.resourcemanager.webapp.address.rm2": "%HOSTGROUP::master_3%:8088",
"yarn.resourcemanager.webapp.https.address.rm1": "%HOSTGROUP::master_2%:8090",
"yarn.resourcemanager.webapp.https.address.rm2": "%HOSTGROUP::master_3%:8090",
"yarn.resourcemanager.recovery.enabled": "true",
"yarn.resourcemanager.store.class": "org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore",
"yarn.resourcemanager.zk-address": "%HOSTGROUP::master_1%:2181,%HOSTGROUP::master_2%:2181,%HOSTGROUP::master_3%:2181",
"yarn.client.failover-proxy-provider": "org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider",
"yarn.resourcemanager.cluster-id": "yarn-cluster",
"yarn.resourcemanager.ha.automatic-failover.zk-base-path": "/yarn-leader-election"
}
}
}
]
}Anyone had experience with RM HA blueprint? Or it's a bug in Ambari 2.4?
Thanks in advance.
Wendell
Created 10-10-2016 10:02 PM
Hello @wbu, You have missed few of the properties in `yarn-site`: Please replace it with the following snippet
"properties": {
"hadoop.registry.rm.enabled": "false",
"hadoop.registry.zk.quorum": "%HOSTGROUP::master_3%:2181,%HOSTGROUP::master_2%:2181,%HOSTGROUP::master_1%:2181",
"yarn.log.server.url": "http://%HOSTGROUP::master_2%:19888/jobhistory/logs",
"yarn.resourcemanager.address": "%HOSTGROUP::master_2%:8050",
"yarn.resourcemanager.admin.address": "%HOSTGROUP::master_2%:8141",
"yarn.resourcemanager.cluster-id": "yarn-cluster",
"yarn.resourcemanager.ha.automatic-failover.zk-base-path": "/yarn-leader-election",
"yarn.resourcemanager.ha.enabled": "true",
"yarn.resourcemanager.ha.rm-ids": "rm1,rm2",
"yarn.resourcemanager.hostname": "%HOSTGROUP::master_2%",
"yarn.resourcemanager.recovery.enabled": "true",
"yarn.resourcemanager.resource-tracker.address": "%HOSTGROUP::master_2%:8025",
"yarn.resourcemanager.scheduler.address": "%HOSTGROUP::master_2%:8030",
"yarn.resourcemanager.store.class": "org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore",
"yarn.resourcemanager.webapp.address": "%HOSTGROUP::master_2%:8088",
"yarn.resourcemanager.webapp.https.address": "%HOSTGROUP::master_2%:8090",
"yarn.timeline-service.address": "%HOSTGROUP::master_2%:10200",
"yarn.timeline-service.webapp.address": "%HOSTGROUP::master_2%:8188",
"yarn.timeline-service.webapp.https.address": "%HOSTGROUP::master_2%:8190",
"yarn.resourcemanager.zk-address": "%HOSTGROUP::master_2%:2181,%HOSTGROUP::master_1%:2181,%HOSTGROUP::master_3%:2181",
"yarn.resourcemanager.hostname.rm1": "%HOSTGROUP::master_2%",
"yarn.resourcemanager.hostname.rm2": "%HOSTGROUP::master_3%"
}
Created 10-10-2016 10:02 PM
Hello @wbu, You have missed few of the properties in `yarn-site`: Please replace it with the following snippet
"properties": {
"hadoop.registry.rm.enabled": "false",
"hadoop.registry.zk.quorum": "%HOSTGROUP::master_3%:2181,%HOSTGROUP::master_2%:2181,%HOSTGROUP::master_1%:2181",
"yarn.log.server.url": "http://%HOSTGROUP::master_2%:19888/jobhistory/logs",
"yarn.resourcemanager.address": "%HOSTGROUP::master_2%:8050",
"yarn.resourcemanager.admin.address": "%HOSTGROUP::master_2%:8141",
"yarn.resourcemanager.cluster-id": "yarn-cluster",
"yarn.resourcemanager.ha.automatic-failover.zk-base-path": "/yarn-leader-election",
"yarn.resourcemanager.ha.enabled": "true",
"yarn.resourcemanager.ha.rm-ids": "rm1,rm2",
"yarn.resourcemanager.hostname": "%HOSTGROUP::master_2%",
"yarn.resourcemanager.recovery.enabled": "true",
"yarn.resourcemanager.resource-tracker.address": "%HOSTGROUP::master_2%:8025",
"yarn.resourcemanager.scheduler.address": "%HOSTGROUP::master_2%:8030",
"yarn.resourcemanager.store.class": "org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore",
"yarn.resourcemanager.webapp.address": "%HOSTGROUP::master_2%:8088",
"yarn.resourcemanager.webapp.https.address": "%HOSTGROUP::master_2%:8090",
"yarn.timeline-service.address": "%HOSTGROUP::master_2%:10200",
"yarn.timeline-service.webapp.address": "%HOSTGROUP::master_2%:8188",
"yarn.timeline-service.webapp.https.address": "%HOSTGROUP::master_2%:8190",
"yarn.resourcemanager.zk-address": "%HOSTGROUP::master_2%:2181,%HOSTGROUP::master_1%:2181,%HOSTGROUP::master_3%:2181",
"yarn.resourcemanager.hostname.rm1": "%HOSTGROUP::master_2%",
"yarn.resourcemanager.hostname.rm2": "%HOSTGROUP::master_3%"
}
Created 10-11-2016 02:00 PM
Thanks, @Bhavin Tandel
It works.
Created 11-16-2016 06:19 PM
Am trying to setup a NN+RM HA blueprint as shown below, but couldn't able to launch successfully and getting errors like couldn't able to start namenode, resource manager services successfully. Please advise.
{
"Blueprints": {
"blueprint_name": "NN_RM_HA",
"stack_name": "HDP",
"stack_version": "2.5"
},
"host_groups": [
{
"name": "edge",
"components": [
{
"name": "ZOOKEEPER_CLIENT"
},
{
"name": "KNOX_GATEWAY"
},
{
"name": "METRICS_MONITOR"
},
{
"name": "TEZ_CLIENT"
},
{
"name": "SLIDER"
},
{
"name": "SQOOP"
},
{
"name": "HDFS_CLIENT"
},
{
"name": "YARN_CLIENT"
},
{
"name": "METRICS_COLLECTOR"
},
{
"name": "MAPREDUCE2_CLIENT"
}
],
"cardinality": "1"
},
{
"name": "datanodes",
"components": [
{
"name": "NODEMANAGER"
},
{
"name": "METRICS_MONITOR"
},
{
"name": "DATANODE"
}
],
"cardinality": "4"
},
{
"name": "mgmt_1",
"components": [
{
"name": "ZOOKEEPER_SERVER"
},
{
"name": "HDFS_CLIENT"
},
{
"name": "YARN_CLIENT"
},
{
"name": "JOURNALNODE"
},
{
"name": "HISTORYSERVER"
},
{
"name": "NAMENODE"
},
{
"name": "ZKFC"
},
{
"name": "METRICS_MONITOR"
}
],
"cardinality": "1"
},
{
"name": "mgmt_2",
"components": [
{
"name": "ZOOKEEPER_SERVER"
},
{
"name": "HDFS_CLIENT"
},
{
"name": "YARN_CLIENT"
},
{
"name": "JOURNALNODE"
},
{
"name": "METRICS_MONITOR"
},
{
"name": "RESOURCEMANAGER"
},
{
"name": "APP_TIMELINE_SERVER"
}
],
"cardinality": "1"
},
{
"name": "mgmt_3",
"components": [
{
"name": "ZOOKEEPER_SERVER"
},
{
"name": "HDFS_CLIENT"
},
{
"name": "YARN_CLIENT"
},
{
"name": "ZKFC"
},
{
"name": "JOURNALNODE"
},
{
"name": "METRICS_MONITOR"
},
{
"name": "NAMENODE"
},
{
"name": "RESOURCEMANAGER"
}
],
"cardinality": "1"
}
],
"configurations": [
{
"core-site": {
"properties": {
"fs.defaultFS": "hdfs://mycluster",
"ha.zookeeper.quorum": "%HOSTGROUP::mgmt_1%:2181,%HOSTGROUP::mgmt_2%:2181,%HOSTGROUP::mgmt_3%:2181",
"hadoop.proxyuser.yarn.hosts": "%HOSTGROUP::mgmt_2%,%HOSTGROUP::mgmt_3%"
}
}
},
{
"hdfs-site": {
"properties": {
"dfs.client.failover.proxy.provider.mycluster": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",
"dfs.ha.automatic-failover.enabled": "true",
"dfs.ha.fencing.methods": "shell(/bin/true)",
"dfs.ha.namenodes.mycluster": "nn1,nn2",
"dfs.namenode.http-address": "%HOSTGROUP::mgmt_1%:50070",
"dfs.namenode.http-address.mycluster.nn1": "%HOSTGROUP::mgmt_1%:50070",
"dfs.namenode.http-address.mycluster.nn2": "%HOSTGROUP::mgmt_3%:50070",
"dfs.namenode.https-address": "%HOSTGROUP::mgmt_1%:50470",
"dfs.namenode.https-address.mycluster.nn1": "%HOSTGROUP::mgmt_1%:50470",
"dfs.namenode.https-address.mycluster.nn2": "%HOSTGROUP::mgmt_3%:50470",
"dfs.namenode.rpc-address.mycluster.nn1": "%HOSTGROUP::mgmt_1%:8020",
"dfs.namenode.rpc-address.mycluster.nn2": "%HOSTGROUP::mgmt_3%:8020",
"dfs.namenode.shared.edits.dir": "qjournal://%HOSTGROUP::mgmt_1%:8485;%HOSTGROUP::mgmt_2%:8485;%HOSTGROUP::mgmt_3%:8485/mycluster",
"dfs.nameservices": "mycluster"
}
}
},
{
"yarn-site": {
"properties": {
"hadoop.registry.rm.enabled": "false",
"hadoop.registry.zk.quorum": "%HOSTGROUP::mgmt_3%:2181,%HOSTGROUP::mgmt_2%:2181,%HOSTGROUP::mgmt_1%:2181",
"yarn.log.server.url": "http://%HOSTGROUP::mgmt_2%:19888/jobhistory/logs",
"yarn.resourcemanager.address": "%HOSTGROUP::mgmt_2%:8050",
"yarn.resourcemanager.admin.address": "%HOSTGROUP::mgmt_2%:8141",
"yarn.resourcemanager.cluster-id": "yarn-cluster",
"yarn.resourcemanager.ha.automatic-failover.zk-base-path": "/yarn-leader-election",
"yarn.resourcemanager.ha.enabled": "true",
"yarn.resourcemanager.ha.rm-ids": "rm1,rm2",
"yarn.resourcemanager.hostname": "%HOSTGROUP::mgmt_2%",
"yarn.resourcemanager.recovery.enabled": "true",
"yarn.resourcemanager.resource-tracker.address": "%HOSTGROUP::mgmt_2%:8025",
"yarn.resourcemanager.scheduler.address": "%HOSTGROUP::mgmt_2%:8030",
"yarn.resourcemanager.store.class": "org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore",
"yarn.resourcemanager.webapp.address": "%HOSTGROUP::mgmt_2%:8088",
"yarn.resourcemanager.webapp.https.address": "%HOSTGROUP::mgmt_2%:8090",
"yarn.timeline-service.address": "%HOSTGROUP::mgmt_2%:10200",
"yarn.timeline-service.webapp.address": "%HOSTGROUP::mgmt_2%:8188",
"yarn.timeline-service.webapp.https.address": "%HOSTGROUP::mgmt_2%:8190",
"yarn.resourcemanager.zk-address": "%HOSTGROUP::mgmt_2%:2181,%HOSTGROUP::mgmt_1%:2181,%HOSTGROUP::mgmt_3%:2181",
"yarn.resourcemanager.hostname.rm1": "%HOSTGROUP::mgmt_2%",
"yarn.resourcemanager.hostname.rm2": "%HOSTGROUP::mgmt_3%"
}
}
}
]
}