<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Solr index not all dataset in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Solr-index-not-all-dataset/m-p/23957#M4645</link>
    <description>&lt;P&gt;Hi!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;There is no any&amp;nbsp;hidden meaning. I just don't have any uniq id and use random for that...&lt;/P&gt;</description>
    <pubDate>Thu, 22 Jan 2015 17:01:42 GMT</pubDate>
    <dc:creator>fil</dc:creator>
    <dc:date>2015-01-22T17:01:42Z</dc:date>
    <item>
      <title>Solr index not all dataset</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Solr-index-not-all-dataset/m-p/23778#M4643</link>
      <description>&lt;P&gt;Hi dear expert!&lt;/P&gt;&lt;P&gt;recentely, I met very strange behaivor of Solr. It index not all dataset.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I try to index CSV files (volume around 0.5 Tb).&lt;/P&gt;&lt;P&gt;But in results a can observe that not all fields are indexed (to be clear only one copy of data are indexed)...&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Example:&lt;/P&gt;&lt;P&gt;impala query&lt;/P&gt;&lt;PRE&gt;select * from dpi2
where
msisdn=9851031305&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;return:&lt;/P&gt;&lt;P&gt;&amp;nbsp; msisdn sgsn local_ip local_port external_ip external_port translated_ip translated_port site uri _c10 _c11 bytes&lt;/P&gt;&lt;TABLE cellspacing="0" cellpadding="0"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;0&lt;/TD&gt;&lt;TD&gt;9851031305&lt;/TD&gt;&lt;TD&gt;NameOfSomeNode2&lt;/TD&gt;&lt;TD&gt;192.168.11.187&lt;/TD&gt;&lt;TD&gt;6455&lt;/TD&gt;&lt;TD&gt;10.62.63.170&lt;/TD&gt;&lt;TD&gt;143&lt;/TD&gt;&lt;TD&gt;213.10.126.195&lt;/TD&gt;&lt;TD&gt;4041&lt;/TD&gt;&lt;TD&gt;bing.com&lt;/TD&gt;&lt;TD&gt;MXAXWVMLKRMM9NLYQGFF&lt;/TD&gt;&lt;TD&gt;2014-11-30T03:11:18Z&lt;/TD&gt;&lt;TD&gt;2014-11-30T03:17:17Z&lt;/TD&gt;&lt;TD&gt;8073&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;9851031305&lt;/TD&gt;&lt;TD&gt;NameOfSomeNode8&lt;/TD&gt;&lt;TD&gt;192.168.11.187&lt;/TD&gt;&lt;TD&gt;6455&lt;/TD&gt;&lt;TD&gt;10.62.63.170&lt;/TD&gt;&lt;TD&gt;143&lt;/TD&gt;&lt;TD&gt;213.10.126.195&lt;/TD&gt;&lt;TD&gt;4041&lt;/TD&gt;&lt;TD&gt;bing.com&lt;/TD&gt;&lt;TD&gt;WOOHQAFFWTQUNO93XNDH&lt;/TD&gt;&lt;TD&gt;2014-11-30T03:11:18Z&lt;/TD&gt;&lt;TD&gt;2014-11-30T03:12:42Z&lt;/TD&gt;&lt;TD&gt;445&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;9851031305&lt;/TD&gt;&lt;TD&gt;NameOfSomeNode2&lt;/TD&gt;&lt;TD&gt;192.168.11.187&lt;/TD&gt;&lt;TD&gt;6455&lt;/TD&gt;&lt;TD&gt;10.62.63.170&lt;/TD&gt;&lt;TD&gt;143&lt;/TD&gt;&lt;TD&gt;213.10.126.195&lt;/TD&gt;&lt;TD&gt;4041&lt;/TD&gt;&lt;TD&gt;bing.com&lt;/TD&gt;&lt;TD&gt;XKSLQUY2ROYYD1YMPVOI&lt;/TD&gt;&lt;TD&gt;2014-11-30T03:11:18Z&lt;/TD&gt;&lt;TD&gt;2014-11-30T03:17:21Z&lt;/TD&gt;&lt;TD&gt;2065&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;thouthand of rows.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;But solr search (msisdn:9851031305) return only one row...&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;{"showDetails":false,"session_time":"[u'2014-11-30T03:11:18Z']","event_time":"[u'2014-11-30T03:15:58Z']","local_ip":"[u'192.168.11.187']","msisdn":["&lt;/SPAN&gt;&lt;EM&gt;9851031305&lt;/EM&gt;&lt;SPAN&gt;"],"sgsn":"[u'NameOfSomeNode3']","translated_ip":"[u'213.10.126.195']","bytes":"[4866]","site":"[u'bing.com']","url":"[u'RHHIAU0AIZCSXDDVLUU0']","translated_port":"[u'4041']","external_ip":"[u'10.62.63.170']","local_port":"[u'6455']","_version_":1490445936214671400,"external_port":"[u'143']","id":"aec90d3e-2ecd-4be9-8f4a-572819c1a127","details":[]}&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;schema.xml file:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt; &amp;lt;fields&amp;gt;
   &amp;lt;field name="msisdn"                         type="string"  indexed="true" stored="true" multiValued="true" /&amp;gt;
   &amp;lt;field name="sgsn"                           type="string"  indexed="true" stored="true" multiValued="true"  /&amp;gt;
   &amp;lt;field name="local_ip"                       type="string"  indexed="true" stored="true" multiValued="true" /&amp;gt;
   &amp;lt;field name="local_port"             type="string"  indexed="true" stored="true" multiValued="true" /&amp;gt;
   &amp;lt;field name="external_ip"            type="string"   indexed="true" stored="true" multiValued="true" /&amp;gt;
   &amp;lt;field name="external_port"          type="string"  indexed="true" stored="true" multiValued="true" /&amp;gt;
   &amp;lt;field name="translated_ip"          type="string"  indexed="true" stored="true" multiValued="true" /&amp;gt;
   &amp;lt;field name="translated_port"        type="string"  indexed="true" stored="true" multiValued="true" /&amp;gt;
   &amp;lt;field name="site"                           type="string"  indexed="true" stored="true" multiValued="true" /&amp;gt;
   &amp;lt;field name="url"                            type="string"  indexed="true" stored="true" multiValued="true" /&amp;gt;
   &amp;lt;field name="session_time"           type="date"  indexed="true" stored="true" multiValued="true" /&amp;gt;
   &amp;lt;field name="event_time"             type="date"  indexed="true" stored="true" multiValued="true" /&amp;gt;
   &amp;lt;field name="bytes"                          type="tint"     indexed="true" stored="true" multiValued="true" /&amp;gt;
   &amp;lt;field name="_version_"                      type="long"     indexed="true" stored="true" multiValued="false"/&amp;gt;
   &amp;lt;field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /&amp;gt;
   &amp;lt;dynamicField name="ignored_*" type="ignored"/&amp;gt;
&amp;lt;/fields&amp;gt;

&amp;lt;uniqueKey&amp;gt;id&amp;lt;/uniqueKey&amp;gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;any support are very appreciated!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 16 Sep 2022 09:19:28 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Solr-index-not-all-dataset/m-p/23778#M4643</guid>
      <dc:creator>fil</dc:creator>
      <dc:date>2022-09-16T09:19:28Z</dc:date>
    </item>
    <item>
      <title>Re: Solr index not all dataset</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Solr-index-not-all-dataset/m-p/23954#M4644</link>
      <description>&lt;P&gt;I see your schema is using "id" as the unique key. &amp;nbsp;What values do you populate that field with?&lt;/P&gt;</description>
      <pubDate>Thu, 22 Jan 2015 16:36:44 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Solr-index-not-all-dataset/m-p/23954#M4644</guid>
      <dc:creator>kman</dc:creator>
      <dc:date>2015-01-22T16:36:44Z</dc:date>
    </item>
    <item>
      <title>Re: Solr index not all dataset</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Solr-index-not-all-dataset/m-p/23957#M4645</link>
      <description>&lt;P&gt;Hi!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;There is no any&amp;nbsp;hidden meaning. I just don't have any uniq id and use random for that...&lt;/P&gt;</description>
      <pubDate>Thu, 22 Jan 2015 17:01:42 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Solr-index-not-all-dataset/m-p/23957#M4645</guid>
      <dc:creator>fil</dc:creator>
      <dc:date>2015-01-22T17:01:42Z</dc:date>
    </item>
    <item>
      <title>Re: Solr index not all dataset</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Solr-index-not-all-dataset/m-p/24147#M4646</link>
      <description>Are you using Morphline for indexing? If yes, can you post your file on here?</description>
      <pubDate>Wed, 28 Jan 2015 15:00:05 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Solr-index-not-all-dataset/m-p/24147#M4646</guid>
      <dc:creator>kman</dc:creator>
      <dc:date>2015-01-28T15:00:05Z</dc:date>
    </item>
    <item>
      <title>Re: Solr index not all dataset</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Solr-index-not-all-dataset/m-p/24400#M4647</link>
      <description>In your morphline file which is reviews.conf. 

change the order for { generateUUID { field : id } } move this down after readCSV 


morphlines : [ 
{ 
id : dpi1 
importCommands : ["org.kitesdk.**", "org.apache.solr.**"] 
commands : [ 
{ 
readCSV { 
separator : "," 
columns : [msisdn,sgsn,local_ip,local_port,external_ip,external_port,translated_ip,translated_port,site,url,session_time,event_time,bytes] 
quoteChar : "\"" 
charset : UTF-8 
} 
} 
{ generateUUID { field : id } } 
{ 
if { 
conditions : [ 
{ 
equals { id : [] } 
} 
] 
then : [ 
{ 
logDebug { format : "output record: {}", args : ["@{}"] } 
} 
] 
} 
}</description>
      <pubDate>Fri, 06 Feb 2015 02:24:31 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Solr-index-not-all-dataset/m-p/24400#M4647</guid>
      <dc:creator>neerjakhattar</dc:creator>
      <dc:date>2015-02-06T02:24:31Z</dc:date>
    </item>
    <item>
      <title>Re: Solr index not all dataset</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Solr-index-not-all-dataset/m-p/24409#M4648</link>
      <description>Thanks it works!</description>
      <pubDate>Fri, 06 Feb 2015 08:34:02 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Solr-index-not-all-dataset/m-p/24409#M4648</guid>
      <dc:creator>fil</dc:creator>
      <dc:date>2015-02-06T08:34:02Z</dc:date>
    </item>
  </channel>
</rss>

