Support Questions
Find answers, ask questions, and share your expertise
Announcements
Alert: Welcome to the Unified Cloudera Community. Former HCC members be sure to read and learn how to activate your account here.

How to configure spellcheck functionality in Solr?

How to configure spellcheck functionality in Solr?

New Contributor

I want to implement spellcheck functionality in Apache Solr-6.6.4, but am not able to do so. My program is not suggesting me the correct word that should be used (in Solr query).

My collection name is employee. I want to implement spellcheck on field name jobTitleName.

I have referred to the following link: http://akashmusings.blogspot.com/2016/03/spell-check-with-solr.html

I am using following configurations.

schema.xml

<uniqueKey>userId</uniqueKey>

 <field name="userId" type="string" indexed="true" stored="true" required="false" /> 
 <field name="jobTitleName" type="string" indexed="false" stored="true" required="false"/>
 <field name="firstName" type="string" indexed="false" stored="true" required="false"/>
 <field name="lastName" type="string" indexed="false" stored="true" required="false"/>
 <field name="preferredFullName" type="string" indexed="false" stored="true" required="false"/>
 <field name="employeeCode" type="string" indexed="false" stored="true" required="false"/>
 <field name="region" type="string" indexed="false" stored="true" required="false"/>
 <field name="phoneNumber" type="string" indexed="false" stored="true" required="false"/>
 <field name="emailAddress" type="string" indexed="false" stored="true" required="false"/>
 <field name="_version_" type="tver" indexed="true" stored="true" />  
 <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
 <dynamicField name="*" type="ignored" multiValued="true"/>
  <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true" />
  <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
  <fieldType name="int" class="solr.IntPointField" docValues="true"/>
  <fieldType name="text" class="solr.TextField" positionIncrementGap="100" multiValued="true"/> 
  <fieldType name="double" class="solr.TrieDoubleField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
  <fieldType name="tdates" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0" multiValued="true"/>
  <fieldType name="tlongs" class="solr.TrieLongField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
  <fieldType name="tver" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" />
  <fieldType name="tdoubles" class="solr.TrieDoubleField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
  <fieldType name="ignored" stored="false" indexed="false" docValues="false" multiValued="true" class="solr.StrField" />

solrconfig.xml (spellcheck)

<searchComponentname="spellcheck"class="solr.SpellCheckComponent">
    <strname="queryAnalyzerFieldType">text_general</str>
    <lstname="spellchecker"><strname="name">default</str>
	<strname="field">jobTitleName</str>
	<strname="classname">solr.DirectSolrSpellChecker</str>
	<strname="distanceMeasure">internal</str>
	<floatname="accuracy">0.5</float>
	<intname="maxEdits">2</int>
	<intname="minPrefix">1</int>
	<intname="maxInspections">5</int>
	<intname="minQueryLength">4</int>
	<floatname="maxQueryFrequency">0.01</float>
	<floatname="thresholdTokenFrequency">.01</float>
    </lst>
    <lstname="spellchecker">
	<strname="name">wordbreak</str>
	<strname="classname">solr.WordBreakSolrSpellChecker</str>
	<strname="field">lowerfilt</str>
	<strname="combineWords">true</str>
	<strname="breakWords">true</str>
	<intname="maxChanges">10</int>
    </lst>
</searchComponent>
<requestHandlername="/spell"class="solr.SearchHandler"startup="lazy">
    <lstname="defaults">
	<strname="spellcheck.dictionary">default</str>
	<strname="spellcheck">on</str>
	<strname="spellcheck.extendedResults">true</str>
	<strname="spellcheck.count">10</str>
	<strname="spellcheck.alternativeTermCount">5</str>
	<strname="spellcheck.maxResultsForSuggest">5</str>
	<strname="spellcheck.collate">true</str>
	<strname="spellcheck.collateExtendedResults">true</str>
	<strname="spellcheck.maxCollationTries">10</str>
	<strname="spellcheck.maxCollations">5</str>
    </lst>
    <arrname="last-components">
	<str>spellcheck</str>
    </arr>
</requestHandler>

solrconfig.xml (update)

  <initParams path="/update/**,/query,/select,/tvrh,/elevate,/spell,/browse">
    <lst name="defaults">
      <str name="df">firstName</str>
    </lst>
  </initParams>

  <initParams path="/update/**">
    <lst name="defaults">
      <str name="update.chain">add-unknown-fields-to-the-schema</str>
    </lst>
  </initParams>    
  <searchComponent name="spellcheck" class="solr.SpellCheckComponent">   
    <str name="queryAnalyzerFieldType">text_general</str>   
    <lst name="spellchecker">
        <str name="name">default</str>
        <str name="field">jobTitleName</str>
      <str name="classname">solr.DirectSolrSpellChecker</str>
      <str name="distanceMeasure">internal</str>
      <float name="accuracy">0.5</float>
      <int name="maxEdits">2</int>
      <int name="minPrefix">1</int>
      <int name="maxInspections">5</int>
      <int name="minQueryLength">4</int>
      <float name="maxQueryFrequency">0.01</float>
      <float name="thresholdTokenFrequency">.01</float>
    </lst>    
    <lst name="spellchecker">
      <str name="name">wordbreak</str>
      <str name="classname">solr.WordBreakSolrSpellChecker</str>
      <str name="field">lowerfilt</str>
      <str name="combineWords">true</str>
      <str name="breakWords">true</str>
      <int name="maxChanges">10</int>
       </lst>
  </searchComponent>

  <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
    <lst name="defaults">
      <str name="spellcheck.dictionary">default</str>
      <str name="spellcheck">on</str>
      <str name="spellcheck.extendedResults">true</str>
      <str name="spellcheck.count">10</str>
      <str name="spellcheck.alternativeTermCount">5</str>
      <str name="spellcheck.maxResultsForSuggest">5</str>
      <str name="spellcheck.collate">true</str>
      <str name="spellcheck.collateExtendedResults">true</str>
      <str name="spellcheck.maxCollationTries">10</str>
      <str name="spellcheck.maxCollations">5</str>
    </lst>
    <arr name="last-components">
      <str>spellcheck</str>
    </arr>
  </requestHandler>

Requirement I want that when user enters the wrong jobTitleName, Solr should produce the correct recommendation using spellcheck.

e.g. If the user enters Developr as jobTitleName, Solr should produce recommendation as Developer to the user using spellcheck.

Don't have an account?
Coming from Hortonworks? Activate your account here