Question

I am using Solr 3.6.1 and I am very satisfied. Now I want to move on solr4.1. So I took “schema.xml” and “solrconfig.xml” (with minor changes) and place them under my new solr4.1 configuration. The indexing was successful (DIH). But, I have noticed an issue. In “schema.xml” I have “copyField” directives in order to index same fields using different “types”. When I try to index using the same configuration on solr4.1, the index size is the half of the index size on solr3.6.1 (and when I query I get different results). Has anything changed on Solr4.1? I need little help on this.

The schema.xml:

<?xml version="1.0" encoding="UTF-8" ?>

<schema name="areios_pagos" version="1.5">
  <types>
    <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
    <!-- boolean type: "true" or "false" -->
    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
    <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
    <fieldtype name="binary" class="solr.BinaryField"/>
    <!--
      Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
    -->
    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>  
    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>

    <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
    <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>

    <fieldType name="pint" class="solr.IntField"/>
    <fieldType name="plong" class="solr.LongField"/>
    <fieldType name="pfloat" class="solr.FloatField"/>
    <fieldType name="pdouble" class="solr.DoubleField"/>
    <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>

    <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>

    <fieldType name="random" class="solr.RandomSortField" indexed="true" />

    <!-- Greek -->
    <fieldType name="text_el" class="solr.TextField" positionIncrementGap="1000">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <!-- greek specific lowercase for sigma -->
        <filter class="solr.GreekLowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
        <filter class="solr.GreekStemFilterFactory"/>
      </analyzer>
    </fieldType>    

    <!-- THIS IS FOR TIKA-PDF -->   
    <fieldType name="text" class="solr.TextField" positionIncrementGap="1000">
        <analyzer type="index">
            <charFilter class="solr.HTMLStripCharFilterFactory"/>
            <tokenizer class="solr.StandardTokenizerFactory"/>  
            <filter class="solr.GreekLowerCaseFilterFactory"/>
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
            <filter class="solr.GreekStemFilterFactory"/>   
            <!--<filter class="solr.HunspellStemFilterFactory" dictionary="dictionaries/el_GR.dic" affix="dictionaries/el_GR.aff" ignoreCase="true" />-->
        </analyzer>
        <analyzer type="query">
            <charFilter class="solr.HTMLStripCharFilterFactory"/>
            <tokenizer class="solr.StandardTokenizerFactory"/>  
            <filter class="solr.GreekLowerCaseFilterFactory"/>
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
            <filter class="solr.GreekStemFilterFactory"/>   
            <!--<filter class="solr.HunspellStemFilterFactory" dictionary="dictionaries/el_GR.dic" affix="dictionaries/el_GR.aff" ignoreCase="true" />-->
        </analyzer>
    </fieldType>

    <!-- THIS FIELDTYPE IN CONJUNCTION WITH THE COPYFIELD DEFINITION  
         HELPS FOR TERMS RELEVANCY
    -->
    <fieldType name="text_areios_pagos_s" class="solr.TextField" positionIncrementGap="100" >
        <analyzer type="index">
            <tokenizer class="solr.WhitespaceTokenizerFactory"/>
            <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="20"/>
            <filter class="solr.GreekLowerCaseFilterFactory"/>
            <!-- <filter class="solr.EdgeNGramFilterFactory" minGramSize="3" maxGramSize="100"/> -->
        </analyzer>
        <analyzer type="query">
            <tokenizer class="solr.WhitespaceTokenizerFactory"/>
            <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="20"/>
            <filter class="solr.GreekLowerCaseFilterFactory"/>
            <!-- <filter class="solr.EdgeNGramFilterFactory" minGramSize="3" maxGramSize="100"/> -->
        </analyzer>
    </fieldType> 
    <!-- END -->    

    <fieldType name="text_areios_pagos" class="solr.TextField" positionIncrementGap="100">    
      <analyzer type="index">       
        <tokenizer class="solr.StandardTokenizerFactory"/>      
        <filter class="solr.GreekLowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
        <filter class="solr.GreekStemFilterFactory"/>       
        <!--<filter class="solr.HunspellStemFilterFactory" dictionary="dictionaries/el_GR.dic" affix="dictionaries/el_GR.aff" ignoreCase="true" />-->
      </analyzer>     
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>      
        <filter class="solr.GreekLowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
        <filter class="solr.GreekStemFilterFactory"/>       
      </analyzer>
    </fieldType>

    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />

    <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>

    <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>

    <fieldtype name="geohash" class="solr.GeoHashField"/>

    <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
 </types>



 <fields>
  <field  name="ida" type="string" indexed="true" stored="true" multiValued="false"/>
  <field  name="solr_id" type="string" indexed="true" stored="true" multiValued="false"/> 
  <field  name="apofasi_number" type="text_areios_pagos" indexed="true" stored="true" multiValued="true"/>
  <field  name="apofasi_date" type="text_areios_pagos" indexed="true" stored="true"/>
  <field  name="apofasi_tmima" type="text_areios_pagos" indexed="true" stored="true"/>
  <field  name="apofasi_taxonomy" type="text_areios_pagos" indexed="true" stored="true"/>
  <field  name="content" type="text_areios_pagos" indexed="true" stored="true" multiValued="true"/> <!-- SET "multiValued=true" IN ORDER TO "copyField" --> 
  <field  name="type" type="string" indexed="true" stored="true"/>  
  <field  name="model" type="string" indexed="true" stored="true" multiValued="false"/>  
  <field  name="url" type="string" indexed="true" stored="true"/>
  <field  name="search_tag" type="text_areios_pagos" indexed="true" stored="true"/>
  <field  name="contentbin" type="text" indexed="true" stored="true" multiValued="true"/>
  <field  name="last_modified" type="string" indexed="true" stored="true"/>  
  <field  name="title" type="text_areios_pagos" indexed="true" stored="true" multiValued="true"/>
  <field  name="grid_title" type="text_areios_pagos" indexed="true" stored="true"/>
  <field  name="contentS" type="text_areios_pagos_s" indexed="true" stored="true"/>
 </fields>

 <uniqueKey>solr_id</uniqueKey>
 <defaultSearchField>content</defaultSearchField>
 <solrQueryParser defaultOperator="AND"/>

   <copyField source="apofasi_number" dest="content" />    
   <copyField source="apofasi_date" dest="content" />   
   <copyField source="apofasi_tmima" dest="content" />   
   <copyField source="apofasi_taxonomy" dest="content" />   
   <copyField source="title" dest="content" />    
   <copyField source="search_tag" dest="content" />
   <copyField source="contentbin" dest="content"/>     
   <copyField source="content" dest="contentS" />


</schema>

The solrconfig.xml

<?xml version="1.0" encoding="UTF-8" ?>

<config>

  <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>


  <luceneMatchVersion>LUCENE_41</luceneMatchVersion>   

  <dataDir>${solr.data.dir:}</dataDir>

  <directoryFactory name="DirectoryFactory" 
                    class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>

  <indexConfig>

  </indexConfig>

  <jmx />

  <updateHandler class="solr.DirectUpdateHandler2">   
  </updateHandler>

  <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       Query section - these settings control query time things like caches
       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
  <query>

    <maxBooleanClauses>2048</maxBooleanClauses>


    <filterCache class="solr.FastLRUCache"
                        size="2048"
                        initialSize="1024"
                        autowarmCount="512"
                        cleanupThread="true" />

    <queryResultCache class="solr.FastLRUCache"
                        size="2048"
                        initialSize="1024"
                        autowarmCount="512"
                        cleanupThread="true" />

    <documentCache class="solr.FastLRUCache"
                        size="2048"
                        initialSize="2048"
                        autowarmCount="512" />

    <fieldValueCache class="solr.FastLRUCache"
                        size="2048"
                        initialSize="512"
                        autowarmCount="512"
                        cleanupThread="true" />                 

    <enableLazyFieldLoading>true</enableLazyFieldLoading>

    <queryResultWindowSize>150</queryResultWindowSize>

    <queryResultMaxDocsCached>200</queryResultMaxDocsCached>   

    <listener event="newSearcher" class="solr.QuerySenderListener">
      <arr name="queries">
        <lst>
          <str name="q">χρησικτησια νομη</str>
          <str name="fq">apofasi_taxonomy:ΠΟΛΙΤΙΚΕΣ</str>
          <str name="sort">apofasi_date asc,ida desc,apofasi_tmima desc</str>
          <str name="start">0</str>
          <str name="rows">150</str>
        </lst>
        <lst>
          <str name="q">νομη</str>
          <str name="fq">apofasi_taxonomy:ΠΟΛΙΤΙΚΕΣ</str>
          <str name="sort">apofasi_date asc,ida desc,apofasi_tmima desc</str>
          <str name="start">0</str>
          <str name="rows">150</str>
        </lst>
        <lst>
          <str name="q">χρησικτησια νομη</str>
          <str name="fq">apofasi_taxonomy:ΠΟΙΝΙΚΕΣ</str>
          <str name="sort">apofasi_date asc,ida desc,apofasi_tmima desc</str>
          <str name="start">0</str>
          <str name="rows">150</str>
        </lst>
      </arr>
    </listener>

    <listener event="firstSearcher" class="solr.QuerySenderListener">
      <arr name="queries">
        <lst>
          <str name="q">χρησικτησια νομη</str>
          <str name="fq">apofasi_taxonomy:ΠΟΛΙΤΙΚΕΣ</str>
          <str name="sort">apofasi_date asc,ida desc,apofasi_tmima desc</str>
          <str name="start">0</str>
          <str name="rows">150</str>
        </lst>
        <lst>
          <str name="q">νομη</str>
          <str name="fq">apofasi_taxonomy:ΠΟΛΙΤΙΚΕΣ</str>
          <str name="sort">apofasi_date asc,ida desc,apofasi_tmima desc</str>
          <str name="start">0</str>
          <str name="rows">150</str>
        </lst>
        <lst>
          <str name="q">χρησικτησια νομη</str>
          <str name="fq">apofasi_taxonomy:ΠΟΙΝΙΚΕΣ</str>
          <str name="sort">apofasi_date asc,ida desc,apofasi_tmima desc</str>
          <str name="start">0</str>
          <str name="rows">150</str>
        </lst>
      </arr>
   </listener>

   <useColdSearcher>false</useColdSearcher>

   <maxWarmingSearchers>2</maxWarmingSearchers>

  </query>

  <requestDispatcher> 
    <requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048000" />    
    <httpCaching never304="true" />
  </requestDispatcher>

  <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
    <lst name="defaults">
        <str name="config">data-config.xml</str>
    </lst>
  </requestHandler>

  <requestHandler name="/select" class="solr.SearchHandler">
     <lst name="defaults">
       <str name="defType">edismax</str>
       <str name="qf">content contentS^10</str>
       <str name="pf">content^10 contentS^100</str>
       <str name="ps">100</str>
       <str name="echoParams">explicit</str>
       <int name="rows">150</int>
       <str name="sort">score desc</str>
       <str name="defType">edismax</str>
       <str name="qf">content contentS^10</str>
       <str name="pf">content^10 contentS^100</str>
       <str name="ps">100</str>
       <str name="wt">json</str>
       <str name="hl">true</str>       
       <str name="fl">solr_id,ida,type,model,keywordlist,title,apofasi_taxonomy,apofasi_tmima,apofasi_date,grid_title</str>
       <str name="hl.fl">content,title</str>
       <str name="f.content.hl.alternateField">content</str>
       <str name="hl.maxAlternateFieldLength">800</str>
       <str name="hl.fragsize">800</str>       
     </lst>  
  </requestHandler>

  <requestHandler name="/update" 
                  class="solr.XmlUpdateRequestHandler">
  </requestHandler>

  <requestHandler name="/update/javabin" 
                  class="solr.BinaryUpdateRequestHandler" />

  <requestHandler name="/update/csv" 
                  class="solr.CSVRequestHandler" 
                  startup="lazy" />

  <requestHandler name="/update/json" 
                  class="solr.JsonUpdateRequestHandler" 
                  startup="lazy" />

  <requestHandler name="/update/extract" 
                  startup="lazy"
                  class="solr.extraction.ExtractingRequestHandler" >
    <lst name="defaults">
      <str name="fmap.content">text</str>
      <str name="lowernames">true</str>
      <str name="uprefix">ignored_</str>      
      <str name="fmap.Last-Modified">last_modified</str>
      <str name="captureAttr">true</str>
      <str name="fmap.a">links</str>
      <str name="fmap.div">ignored_</str>
    </lst>
  </requestHandler>

  <requestHandler name="/update/xslt"
                   startup="lazy"
                   class="solr.XsltUpdateRequestHandler"/>

  <requestHandler name="/analysis/field" 
                  startup="lazy"
                  class="solr.FieldAnalysisRequestHandler" />

  <requestHandler name="/analysis/document" 
                  class="solr.DocumentAnalysisRequestHandler" 
                  startup="lazy" />

  <requestHandler name="/admin/" 
                  class="solr.admin.AdminHandlers" />


  <!-- ping/healthcheck -->
  <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
    <lst name="invariants">
      <str name="q">solrpingquery</str>
    </lst>
    <lst name="defaults">
      <str name="echoParams">all</str>
    </lst>
  </requestHandler>

  <!-- Echo the request contents back to the client -->
  <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
    <lst name="defaults">
     <str name="echoParams">explicit</str> 
     <str name="echoHandler">true</str>
    </lst>
  </requestHandler>

  <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
    <str name="queryAnalyzerFieldType">textSpell</str>
    <lst name="spellchecker">
      <str name="name">default</str>
      <str name="field">name</str>
      <str name="spellcheckIndexDir">spellchecker</str>     
    </lst>    
  </searchComponent>


  <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
    <lst name="defaults">
      <str name="df">text</str>
      <str name="spellcheck.onlyMorePopular">false</str>
      <str name="spellcheck.extendedResults">false</str>
      <str name="spellcheck.count">1</str>
    </lst>
    <arr name="last-components">
      <str>spellcheck</str>
    </arr>
  </requestHandler>  

  <searchComponent name="terms" class="solr.TermsComponent"/>

  <!-- A request handler for demonstrating the terms component -->
  <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
     <lst name="defaults">
      <bool name="terms">true</bool>
    </lst>     
    <arr name="components">
      <str>terms</str>
    </arr>
  </requestHandler>

  <searchComponent name="elevator" class="solr.QueryElevationComponent" >
    <!-- pick a fieldType to analyze queries -->
    <str name="queryFieldType">string</str>
    <str name="config-file">elevate.xml</str>
  </searchComponent>

  <!-- A request handler for demonstrating the elevator component -->
  <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
    <lst name="defaults">
      <str name="echoParams">explicit</str>
      <str name="df">text</str>
    </lst>
    <arr name="last-components">
      <str>elevator</str>
    </arr>
  </requestHandler>  

  <searchComponent class="solr.HighlightComponent" name="highlight">
    <highlighting>
      <fragmenter name="gap" 
                  default="true"
                  class="solr.highlight.GapFragmenter">
        <lst name="defaults">
        </lst>
      </fragmenter>
      <fragmenter name="regex" 
                  class="solr.highlight.RegexFragmenter">
        <lst name="defaults">
          <!-- slightly smaller fragsizes work better because of slop -->
          <int name="hl.fragsize">70</int>
          <!-- allow 50% slop on fragment sizes -->
          <float name="hl.regex.slop">0.5</float>
          <!-- a basic sentence pattern -->
          <str name="hl.regex.pattern">[-\w ,/\n\&quot;&apos;]{20,200}</str>
        </lst>
      </fragmenter>
      <!-- Configure the standard formatter -->
      <formatter name="html" 
                 default="true"
                 class="solr.highlight.HtmlFormatter">
        <lst name="defaults">         
          <str name="hl.simple.pre">&lt;shl&gt;</str>
          <str name="hl.simple.post">&lt;/shl&gt;</str>       
        </lst>
      </formatter>

      <!-- Configure the standard encoder -->
      <encoder name="html" 
               class="solr.highlight.HtmlEncoder" />

      <!-- Configure the standard fragListBuilder -->
      <fragListBuilder name="simple" 
                       default="true"
                       class="solr.highlight.SimpleFragListBuilder"/>

      <!-- Configure the single fragListBuilder -->
      <fragListBuilder name="single" 
                       class="solr.highlight.SingleFragListBuilder"/>

      <!-- default tag FragmentsBuilder -->
      <fragmentsBuilder name="default" 
                        default="true"
                        class="solr.highlight.ScoreOrderFragmentsBuilder">    
      </fragmentsBuilder>

      <fragmentsBuilder name="colored" 
                        class="solr.highlight.ScoreOrderFragmentsBuilder">
        <lst name="defaults">
          <str name="hl.tag.pre"><![CDATA[
               <b style="background:yellow">,<b style="background:lawgreen">,
               <b style="background:aquamarine">,<b style="background:magenta">,
               <b style="background:palegreen">,<b style="background:coral">,
               <b style="background:wheat">,<b style="background:khaki">,
               <b style="background:lime">,<b style="background:deepskyblue">]]></str>
          <str name="hl.tag.post"><![CDATA[</b>]]></str>
        </lst>
      </fragmentsBuilder>

      <boundaryScanner name="default" 
                       default="true"
                       class="solr.highlight.SimpleBoundaryScanner">
        <lst name="defaults">
          <str name="hl.bs.maxScan">10</str>
          <str name="hl.bs.chars">.,!? &#9;&#10;&#13;</str>
        </lst>
      </boundaryScanner>

      <boundaryScanner name="breakIterator" 
                       class="solr.highlight.BreakIteratorBoundaryScanner">
        <lst name="defaults">

          <str name="hl.bs.type">WORD</str>

          <str name="hl.bs.language">en</str>
          <str name="hl.bs.country">US</str>
        </lst>
      </boundaryScanner>
    </highlighting>
  </searchComponent>

  <queryResponseWriter name="json" class="solr.JSONResponseWriter">

    <str name="content-type">text/plain; charset=UTF-8</str>
  </queryResponseWriter>


    <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>


  <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
    <int name="xsltCacheLifetimeSeconds">5</int>
  </queryResponseWriter>
  <admin>
    <defaultQuery>*:*</defaultQuery>


  </admin>

</config>

Regards,

Tom

Was it helpful?

Solution

Solr 4.1 maintains the stored fields in a compressed way that may explain the Index size reduction.

Also,

<copyField source="content" dest="contentS" />

Documentation @ http://wiki.apache.org/solr/SchemaXml#Copy_Fields

The copy is done at the stream source level and no copy feeds into another copy.

Having a copyfield as the source of copyfield tag does not work.
The copyfield source must be an actual field, which has some value and does no cascade.

You can also check http://lucene.472066.n3.nabble.com/does-copyField-recurse-td2450208.html

Does this work for you ??

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top