<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: HDFS NameNode roles failing to start after host restarts in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/HDFS-NameNode-roles-failing-to-start-after-host-restarts/m-p/374295#M241946</link>
    <description>&lt;P&gt;Hello&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/106116"&gt;@idodds&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Your Namenode is failing to connect to quorum of JN (2/3).&lt;/P&gt;&lt;P&gt;Could you check and share any errors/warn you are getting on the two remote JN hosts ?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you&amp;nbsp;&lt;/P&gt;&lt;P&gt;Parth Joshi&lt;/P&gt;</description>
    <pubDate>Fri, 21 Jul 2023 17:52:25 GMT</pubDate>
    <dc:creator>pajoshi</dc:creator>
    <dc:date>2023-07-21T17:52:25Z</dc:date>
    <item>
      <title>HDFS NameNode roles failing to start after host restarts</title>
      <link>https://community.cloudera.com/t5/Support-Questions/HDFS-NameNode-roles-failing-to-start-after-host-restarts/m-p/374282#M241936</link>
      <description>&lt;P&gt;Hello All,&lt;/P&gt;&lt;P&gt;I'm trouble-shooting the following issue with our Cloudera Nutch cluster and would appreciate any help the community can offer:&lt;/P&gt;&lt;P&gt;We have two NameNode roles and three JournalNode roles running, however both NameNode roles are failing to start and reporting the error below (IP addresses obfuscated).&amp;nbsp; This occurred following a restart of the underlying hosts.&lt;BR /&gt;Any recommendations for a recovery path from this error would be greatly appreciated.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;Error: recoverUnfinalizedSegments failed for required journal (JournalAndStream(mgr=QJM to [x.x.x.95:8485, x.x.x.86:8485, x.x.x.130:8485], stream=null))
org.apache.hadoop.hdfs.qjournal.client.QuorumException: Got too many exceptions to achieve quorum size 2/3. 1 successful responses:
x.x.x.130:8485: null [success]
2 exceptions thrown:
10.103.28.95:8485: tried to access method com.google.common.collect.Range.&amp;lt;init&amp;gt;(Lcom/google/common/collect/Cut;Lcom/google/common/collect/Cut;)V from class com.google.common.collect.Ranges
	at com.google.common.collect.Ranges.create(Ranges.java:76)
	at com.google.common.collect.Ranges.closed(Ranges.java:98)
	at org.apache.hadoop.hdfs.qjournal.server.Journal.txnRange(Journal.java:872)
	at org.apache.hadoop.hdfs.qjournal.server.Journal.acceptRecovery(Journal.java:806)
	at org.apache.hadoop.hdfs.qjournal.server.JournalNodeRpcServer.acceptRecovery(JournalNodeRpcServer.java:206)
	at org.apache.hadoop.hdfs.qjournal.protocolPB.QJournalProtocolServerSideTranslatorPB.acceptRecovery(QJournalProtocolServerSideTranslatorPB.java:261)
	at org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos$QJournalProtocolService$2.callBlockingMethod(QJournalProtocolProtos.java:25435)
	at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
	at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1060)
	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2086)
	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2082)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:422)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1671)
	at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2080)

10.103.28.86:8485: tried to access method com.google.common.collect.Range.&amp;lt;init&amp;gt;(Lcom/google/common/collect/Cut;Lcom/google/common/collect/Cut;)V from class com.google.common.collect.Ranges
	at com.google.common.collect.Ranges.create(Ranges.java:76)
	at com.google.common.collect.Ranges.closed(Ranges.java:98)
	at org.apache.hadoop.hdfs.qjournal.server.Journal.txnRange(Journal.java:872)
	at org.apache.hadoop.hdfs.qjournal.server.Journal.acceptRecovery(Journal.java:806)
	at org.apache.hadoop.hdfs.qjournal.server.JournalNodeRpcServer.acceptRecovery(JournalNodeRpcServer.java:206)
	at org.apache.hadoop.hdfs.qjournal.protocolPB.QJournalProtocolServerSideTranslatorPB.acceptRecovery(QJournalProtocolServerSideTranslatorPB.java:261)
	at org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos$QJournalProtocolService$2.callBlockingMethod(QJournalProtocolProtos.java:25435)
	at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
	at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1060)
	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2086)
	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2082)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:422)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1671)
	at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2080)

	at org.apache.hadoop.hdfs.qjournal.client.QuorumException.create(QuorumException.java:81)
	at org.apache.hadoop.hdfs.qjournal.client.QuorumCall.rethrowException(QuorumCall.java:223)
	at org.apache.hadoop.hdfs.qjournal.client.AsyncLoggerSet.waitForWriteQuorum(AsyncLoggerSet.java:142)
	at org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager.recoverUnclosedSegment(QuorumJournalManager.java:345)
	at org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager.recoverUnfinalizedSegments(QuorumJournalManager.java:455)
	at org.apache.hadoop.hdfs.server.namenode.JournalSet$8.apply(JournalSet.java:624)
	at org.apache.hadoop.hdfs.server.namenode.JournalSet.mapJournalsAndReportErrors(JournalSet.java:393)
	at org.apache.hadoop.hdfs.server.namenode.JournalSet.recoverUnfinalizedSegments(JournalSet.java:621)
	at org.apache.hadoop.hdfs.server.namenode.FSEditLog.recoverUnclosedStreams(FSEditLog.java:1408)
	at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startActiveServices(FSNamesystem.java:1201)
	at org.apache.hadoop.hdfs.server.namenode.NameNode$NameNodeHAContext.startActiveServices(NameNode.java:1717)
	at org.apache.hadoop.hdfs.server.namenode.ha.ActiveState.enterState(ActiveState.java:61)
	at org.apache.hadoop.hdfs.server.namenode.ha.HAState.setStateInternal(HAState.java:64)
	at org.apache.hadoop.hdfs.server.namenode.ha.StandbyState.setState(StandbyState.java:49)
	at org.apache.hadoop.hdfs.server.namenode.NameNode.transitionToActive(NameNode.java:1590)
	at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.transitionToActive(NameNodeRpcServer.java:1351)
	at org.apache.hadoop.ha.protocolPB.HAServiceProtocolServerSideTranslatorPB.transitionToActive(HAServiceProtocolServerSideTranslatorPB.java:107)
	at org.apache.hadoop.ha.proto.HAServiceProtocolProtos$HAServiceProtocolService$2.callBlockingMethod(HAServiceProtocolProtos.java:4460)
	at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
	at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1060)
	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2086)
	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2082)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:422)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1671)
	at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2080)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 21 Jul 2023 11:49:00 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/HDFS-NameNode-roles-failing-to-start-after-host-restarts/m-p/374282#M241936</guid>
      <dc:creator>idodds</dc:creator>
      <dc:date>2023-07-21T11:49:00Z</dc:date>
    </item>
    <item>
      <title>Re: HDFS NameNode roles failing to start after host restarts</title>
      <link>https://community.cloudera.com/t5/Support-Questions/HDFS-NameNode-roles-failing-to-start-after-host-restarts/m-p/374294#M241945</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/106116"&gt;@idodds&lt;/a&gt;&amp;nbsp;Welcome to the Cloudera Community!&lt;BR /&gt;&lt;BR /&gt;To help you get the best possible solution, I have tagged our HDFS experts&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/80648"&gt;@blizano&lt;/a&gt;&amp;nbsp;and&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/47030"&gt;@pajoshi&lt;/a&gt;&amp;nbsp; who may be able to assist you further.&lt;BR /&gt;&lt;BR /&gt;Please keep us updated on your post, and we hope you find a satisfactory solution to your query.&lt;/P&gt;</description>
      <pubDate>Fri, 21 Jul 2023 17:25:07 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/HDFS-NameNode-roles-failing-to-start-after-host-restarts/m-p/374294#M241945</guid>
      <dc:creator>DianaTorres</dc:creator>
      <dc:date>2023-07-21T17:25:07Z</dc:date>
    </item>
    <item>
      <title>Re: HDFS NameNode roles failing to start after host restarts</title>
      <link>https://community.cloudera.com/t5/Support-Questions/HDFS-NameNode-roles-failing-to-start-after-host-restarts/m-p/374295#M241946</link>
      <description>&lt;P&gt;Hello&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/106116"&gt;@idodds&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Your Namenode is failing to connect to quorum of JN (2/3).&lt;/P&gt;&lt;P&gt;Could you check and share any errors/warn you are getting on the two remote JN hosts ?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you&amp;nbsp;&lt;/P&gt;&lt;P&gt;Parth Joshi&lt;/P&gt;</description>
      <pubDate>Fri, 21 Jul 2023 17:52:25 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/HDFS-NameNode-roles-failing-to-start-after-host-restarts/m-p/374295#M241946</guid>
      <dc:creator>pajoshi</dc:creator>
      <dc:date>2023-07-21T17:52:25Z</dc:date>
    </item>
    <item>
      <title>Re: HDFS NameNode roles failing to start after host restarts</title>
      <link>https://community.cloudera.com/t5/Support-Questions/HDFS-NameNode-roles-failing-to-start-after-host-restarts/m-p/374296#M241947</link>
      <description>&lt;P&gt;Hi. Thank you for responding. Replying on behalf of &lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/106116"&gt;@idodds&lt;/a&gt;.&amp;nbsp;Both of the nodes report same/similar errors as below:&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;Jul 21, 8:33:30.310 AM	INFO	org.apache.hadoop.hdfs.qjournal.server.Journal	
Updating lastPromisedEpoch from 172 to 173 for client /x.y.z.30
Jul 21, 8:33:30.312 AM	INFO	org.apache.hadoop.hdfs.qjournal.server.Journal	
Scanning storage FileJournalManager(root=/dfs/journal-edits/nutch-nameservice1)
Jul 21, 8:33:30.329 AM	INFO	org.apache.hadoop.hdfs.qjournal.server.Journal	
Latest log is EditLogFile(file=/dfs/journal-edits/nutch-nameservice1/current/edits_inprogress_0000000000256541217,first=0000000000256541217,last=0000000000256541842,inProgress=true,hasCorruptHeader=false)
Jul 21, 8:33:30.339 AM	INFO	org.apache.hadoop.hdfs.qjournal.server.Journal	
getSegmentInfo(256541217): EditLogFile(file=/dfs/journal-edits/nutch-nameservice1/current/edits_inprogress_0000000000256541217,first=0000000000256541217,last=0000000000256541842,inProgress=true,hasCorruptHeader=false) -&amp;gt; startTxId: 256541217 endTxId: 256541842 isInProgress: true
Jul 21, 8:33:30.340 AM	INFO	org.apache.hadoop.hdfs.qjournal.server.Journal	
Prepared recovery for segment 256541217: segmentState { startTxId: 256541217 endTxId: 256541842 isInProgress: true } lastWriterEpoch: 38 lastCommittedTxId: 256541843
Jul 21, 8:33:30.358 AM	INFO	org.apache.hadoop.hdfs.qjournal.server.Journal	
getSegmentInfo(256541217): EditLogFile(file=/dfs/journal-edits/nutch-nameservice1/current/edits_inprogress_0000000000256541217,first=0000000000256541217,last=0000000000256541842,inProgress=true,hasCorruptHeader=false) -&amp;gt; startTxId: 256541217 endTxId: 256541842 isInProgress: true
Jul 21, 8:33:30.358 AM	INFO	org.apache.hadoop.hdfs.qjournal.server.Journal	
Synchronizing log startTxId: 256541217 endTxId: 256541843 isInProgress: true: old segment startTxId: 256541217 endTxId: 256541842 isInProgress: true is not the right length
Jul 21, 8:33:30.358 AM	WARN	org.apache.hadoop.ipc.Server	
IPC Server handler 1 on 8485, call org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocol.acceptRecovery from x.y.z.30:37022 Call#17 Retry#0
java.lang.IllegalAccessError: tried to access method com.google.common.collect.Range.&amp;lt;init&amp;gt;(Lcom/google/common/collect/Cut;Lcom/google/common/collect/Cut;)V from class com.google.common.collect.Ranges
	at com.google.common.collect.Ranges.create(Ranges.java:76)
	at com.google.common.collect.Ranges.closed(Ranges.java:98)
	at org.apache.hadoop.hdfs.qjournal.server.Journal.txnRange(Journal.java:872)
	at org.apache.hadoop.hdfs.qjournal.server.Journal.acceptRecovery(Journal.java:806)
	at org.apache.hadoop.hdfs.qjournal.server.JournalNodeRpcServer.acceptRecovery(JournalNodeRpcServer.java:206)
	at org.apache.hadoop.hdfs.qjournal.protocolPB.QJournalProtocolServerSideTranslatorPB.acceptRecovery(QJournalProtocolServerSideTranslatorPB.java:261)
	at org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos$QJournalProtocolService$2.callBlockingMethod(QJournalProtocolProtos.java:25435)
	at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
	at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1060)
	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2086)
	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2082)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:422)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1671)
	at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2080)&lt;/LI-CODE&gt;&lt;P&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 21 Jul 2023 18:08:20 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/HDFS-NameNode-roles-failing-to-start-after-host-restarts/m-p/374296#M241947</guid>
      <dc:creator>Wiggles</dc:creator>
      <dc:date>2023-07-21T18:08:20Z</dc:date>
    </item>
  </channel>
</rss>

