Created 12-06-2018 03:00 PM
We are using Amabri Version2.4.0.1 to manage Hadoop HA cluster.and noticed Ambari infra solr was stopped for few days.
After successfully starting the service again, below errors reporting in solr.log continuously.
[qtp1769597131-103] ERROR [c:hadoop_logs s:shard0 r:core_node2 x:hadoop_logs_shard0_replica1] org.apache.solr.common.SolrException (SolrException.java:148) - org.apache.solr.common.SolrException: No registered leader was found after waiting for 4000ms , collection: hadoop_logs slice: shard2 at org.apache.solr.common.cloud.ZkStateReader.getLeaderRetry(ZkStateReader.java:626) at org.apache.solr.common.cloud.ZkStateReader.getLeaderRetry(ZkStateReader.java:612) at org.apache.solr.update.processor.DistributedUpdateProcessor.setupRequest(DistributedUpdateProcessor.java:367) at org.apache.solr.update.processor.DistributedUpdateProcessor.setupRequest(DistributedUpdateProcessor.java:315) at org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:671) at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48) at org.apache.solr.update.processor.AddSchemaFieldsUpdateProcessorFactory$AddSchemaFieldsUpdateProcessor.processAdd(AddSchemaFieldsUpdateProcessorFactory.java:335) at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48) at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:117) at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48) at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:117) at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48) at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:117) at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48) at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:117) at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48) at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:117) at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48) at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:117) at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48) at org.apache.solr.update.processor.DocExpirationUpdateProcessorFactory$TTLUpdateProcessor.processAdd(DocExpirationUpdateProcessorFactory.java:347) at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48) at org.apache.solr.update.processor.AbstractDefaultValueUpdateProcessorFactory$DefaultValueUpdateProcessor.processAdd(AbstractDefaultValueUpdateProcessorFactory.java:93) at org.apache.solr.handler.loader.JavabinLoader$1.update(JavabinLoader.java:97) at org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec$1.readOuterMostDocIterator(JavaBinUpdateRequestCodec.java:179) at org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec$1.readIterator(JavaBinUpdateRequestCodec.java:135) at org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:274) at org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec$1.readNamedList(JavaBinUpdateRequestCodec.java:121) at org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:239) at org.apache.solr.common.util.JavaBinCodec.unmarshal(JavaBinCodec.java:157) at org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec.unmarshal(JavaBinUpdateRequestCodec.java:186) at org.apache.solr.handler.loader.JavabinLoader.parseAndLoadDocs(JavabinLoader.java:107) at org.apache.solr.handler.loader.JavabinLoader.load(JavabinLoader.java:54) at org.apache.solr.handler.UpdateRequestHandler$1.load(UpdateRequestHandler.java:94) at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:69) at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:155) at org.apache.solr.core.SolrCore.execute(SolrCore.java:2102) at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:654) at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:460) at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:257) at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:208) at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1652) at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:585) at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:577) at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:223) at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1127) at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:515) at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1061) at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:215) at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:110) at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:97) at org.eclipse.jetty.server.Server.handle(Server.java:499) at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:310) at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:257) at org.eclipse.jetty.io.AbstractConnection$2.run(AbstractConnection.java:540) at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:635) at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:555) at java.lang.Thread.run(Thread.java:745)
How do we fix this issue.
Created 12-06-2018 06:37 PM
Hi @Sajesh PP
Currently, I see a hadoop_logs collection which logsearch uses is in down state and not recovering, due to which leader is not assigned to the collection.
For fix this issue, you can drop the collection.
If cluster is kerberized follow below step:
kinit with ambari-infra keytab
# curl -i -v --negotiate -u : "http://<SOLR_HOST>:8886/solr/admin/collections?action=DELETE&name=hadoop_logs"
Restart LogSearch, which will create hadoop_logs. If the cluster is Non-Kerberos just normal url in a browser that will also work.
Same method, can be used for other collections, if they are in DOWN state. You can access Solr UI -> Cloud -> check the status of collections
Created 12-06-2018 06:37 PM
Hi @Sajesh PP
Currently, I see a hadoop_logs collection which logsearch uses is in down state and not recovering, due to which leader is not assigned to the collection.
For fix this issue, you can drop the collection.
If cluster is kerberized follow below step:
kinit with ambari-infra keytab
# curl -i -v --negotiate -u : "http://<SOLR_HOST>:8886/solr/admin/collections?action=DELETE&name=hadoop_logs"
Restart LogSearch, which will create hadoop_logs. If the cluster is Non-Kerberos just normal url in a browser that will also work.
Same method, can be used for other collections, if they are in DOWN state. You can access Solr UI -> Cloud -> check the status of collections
Created 12-10-2018 07:11 AM
Hi @PRAVIN BHAGADE
I have dropped the collection and restarted LogSearch. The issues is fixed now.
Thanks for your help.
Created 01-09-2019 08:52 AM
We have noticed Ambari infra solr service failing regularly after this.
The service running in Masternode2
Below are the errors in solr.log.
2019-01-08 06:30:28,057 [coreContainerWorkExecutor-2-thread-1-processing-n:prdhdpmn2.na.ad.example.com:8886_solr] ERROR [ ] org.apache.solr.core.CoreContainer$2 (CoreContainer.java:500) - Error waiting for SolrCore to be createdjava.util.concurrent.ExecutionException: org.apache.solr.common.SolrException: Unable to create core [audit_logs_shard0_replica1] at java.util.concurrent.FutureTask.report(FutureTask.java:122) at java.util.concurrent.FutureTask.get(FutureTask.java:192) at org.apache.solr.core.CoreContainer$2.run(CoreContainer.java:496) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at org.apache.solr.common.util.ExecutorUtil$MDCAwareThreadPoolExecutor$1.run(ExecutorUtil.java:231) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745)Caused by: org.apache.solr.common.SolrException: Unable to create core [audit_logs_shard0_replica1] at org.apache.solr.core.CoreContainer.create(CoreContainer.java:827) at org.apache.solr.core.CoreContainer.access$000(CoreContainer.java:87) at org.apache.solr.core.CoreContainer$1.call(CoreContainer.java:467) at org.apache.solr.core.CoreContainer$1.call(CoreContainer.java:458) ... 5 moreCaused by: org.apache.solr.common.SolrException: Could not load conf for core audit_logs_shard0_replica1: Can't load schema managed-schema: [schema.xml] Duplicate field definition for '2018-07-03 23:00:00,684 INFO FSNamesystem.audit : allowed' [[[2018-07-03 23:00:00,684 INFO FSNamesystem.audit : allowed{type=boolean,properties=indexed,stored,omitNorms,omitTermFreqAndPositions,sortMissingLast}]]] and [[[2018-07-03 23:00:00,684 INFO FSNamesystem.audit : allowed{type=boolean,properties=indexed,stored,omitNorms,omitTermFreqAndPositions,sortMissingLast}]]] at org.apache.solr.core.ConfigSetService.getConfig(ConfigSetService.java:84) at org.apache.solr.core.CoreContainer.create(CoreContainer.java:812) ... 8 moreCaused by: org.apache.solr.common.SolrException: Can't load schema managed-schema: [schema.xml] Duplicate field definition for '2018-07-03 23:00:00,684 INFO FSNamesystem.audit : allowed' [[[2018-07-03 23:00:00,684 INFO FSNamesystem.audit : allowed{type=boolean,properties=indexed,stored,omitNorms,omitTermFreqAndPositions,sortMissingLast}]]] and [[[2018-07-03 23:00:00,684 INFO FSNamesystem.audit : allowed{type=boolean,properties=indexed,stored,omitNorms,omitTermFreqAndPositions,sortMissingLast}]]] at org.apache.solr.schema.IndexSchema.readSchema(IndexSchema.java:577) at org.apache.solr.schema.IndexSchema.<init>(IndexSchema.java:159) at org.apache.solr.schema.ManagedIndexSchema.<init>(ManagedIndexSchema.java:104) at org.apache.solr.schema.ManagedIndexSchemaFactory.create(ManagedIndexSchemaFactory.java:173) at org.apache.solr.schema.ManagedIndexSchemaFactory.create(ManagedIndexSchemaFactory.java:47) at org.apache.solr.schema.IndexSchemaFactory.buildIndexSchema(IndexSchemaFactory.java:70) at org.apache.solr.core.ConfigSetService.createIndexSchema(ConfigSetService.java:108) at org.apache.solr.core.ConfigSetService.getConfig(ConfigSetService.java:79) ... 9 moreCaused by: org.apache.solr.common.SolrException: [schema.xml] Duplicate field definition for '2018-07-03 23:00:00,684 INFO FSNamesystem.audit : allowed' [[[2018-07-03 23:00:00,684 INFO FSNamesystem.audit : allowed{type=boolean,properties=indexed,stored,omitNorms,omitTermFreqAndPositions,sortMissingLast}]]] and [[[2018-07-03 23:00:00,684 INFO FSNamesystem.audit : allowed{type=boolean,properties=indexed,stored,omitNorms,omitTermFreqAndPositions,sortMissingLast}]]] at org.apache.solr.schema.IndexSchema.loadFields(IndexSchema.java:642) at org.apache.solr.schema.IndexSchema.readSchema(IndexSchema.java:473) ... 16 more2019-01-08 06:30:28,072 [recoveryExecutor-16-thread-1-processing-n:prdhdpmn2.na.ad.example.com:8886_solr x:ranger_audits_shard1_replica1 s:shard1 c:ranger_audits r:core_node1] WARN [c:ranger_audits s:shard1 r:core_node1 x:ranger_audits_shard1_replica1] org.apache.solr.update.UpdateLog$LogReplayer (UpdateLog.java:1308) - Starting log replay tlog{file=/data/ambari_infra_solr/data/ranger_audits_shard1_replica1/data/tlog/tlog.0000000000001289032 refcount=2} active=false starting pos=02019-01-08 06:30:34,982 [commitScheduler-22-thread-1] WARN [c:ranger_audits s:shard1 r:core_node1 x:ranger_audits_shard1_replica1] org.apache.solr.core.SolrCore (SolrCore.java:1795) - [ranger_audits_shard1_replica1] PERFORMANCE WARNING: Overlapping onDeckSearchers=22019-01-08 07:52:30,043 [recoveryExecutor-16-thread-1-processing-n:prdhdpmn2.na.ad.example.com:8886_solr x:ranger_audits_shard1_replica1 s:shard1 c:ranger_audits r:core_node1] WARN [c:ranger_audits s:shard1 r:core_node1 x:ranger_audits_shard1_replica1] org.apache.solr.update.UpdateLog$LogReplayer (UpdateLog.java:1298) - Log replay finished. recoveryInfo=RecoveryInfo{adds=1 deletes=0 deleteByQuery=0 errors=0 positionOfStart=0}