문제

I just implemented apache's solr search in our e-commerce application. I need to fine tune the search list for better user experience. I need guidance since I am new to solr search.

What I need to do is more relevant result or exact result. Say example if user types ‘pen’, it should list

  • pens
  • pen stand
  • pen drives (also fine but pens should be high priority)

But its displaying

  • pens
  • pen stands
  • pen drives
  • dispensers

First 3 is OK but dispensers, the word pen is present in middle of the word (dis pen sers) so no need to list dispensers with the pen lists. How to achieve this?

Update-1:

schema.xml
<types>
        <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true" />
        <!-- boolean type: "true" or "false" -->
        <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true" />
        <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
        <fieldtype name="binary" class="solr.BinaryField" />
        <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0" />
        <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0" />
        <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0" />
        <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0" />
        <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0" />
        <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0" />
        <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0" />
        <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0" />
        <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0" />
        <!-- A Trie based date field for faster date range queries and date faceting. -->
        <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0" />
        <fieldType name="pint" class="solr.IntField" omitNorms="true" />
        <fieldType name="plong" class="solr.LongField" omitNorms="true" />
        <fieldType name="pfloat" class="solr.FloatField" omitNorms="true" />
        <fieldType name="pdouble" class="solr.DoubleField" omitNorms="true" />
        <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" omitNorms="true" />
        <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true" />
        <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true" />
        <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true" />
        <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true" />
        <fieldType name="random" class="solr.RandomSortField" indexed="true" />
        <!-- A text field that only splits on whitespace for exact matching of words -->
        <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
            <analyzer>
                <tokenizer class="solr.WhitespaceTokenizerFactory" />
                <filter class="solr.LowerCaseFilterFactory" />
            </analyzer>
        </fieldType>
        <!-- Text keyword -->
        <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100">
            <analyzer>
                <tokenizer class="solr.KeywordTokenizerFactory" />
            </analyzer>
        </fieldType>
        <!-- Text path -->
        <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
            <analyzer>
                <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="\" replace="/" />
            </analyzer>
        </fieldType>
        <!--FOR AUTO SUGGESTION FIELD -->
        <fieldType name="edgytext" class="solr.TextField" positionIncrementGap="100">
            <analyzer type="index">
                <tokenizer class="solr.NGramTokenizerFactory" minGramSize="1" maxGramSize="50"/>
                <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false" />
                <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
                <filter class="solr.LowerCaseFilterFactory" />
                <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
            </analyzer>
            <analyzer type="query">
                <tokenizer class="solr.KeywordTokenizerFactory" />
                <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false" />
                <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
                <filter class="solr.LowerCaseFilterFactory" />
            </analyzer>
        </fieldType>
        <!-- Text Shingle -->
        <fieldType name="text_shingle" class="solr.TextField" positionIncrementGap="100">
            <analyzer>
                <tokenizer class="solr.WhitespaceTokenizerFactory" />
                <filter class="solr.ShingleFilterFactory" maxShingleSize="7" outputUnigrams="true"/>
                <filter class="solr.LowerCaseFilterFactory" />
            </analyzer>
        </fieldType>
    </types>
    <!-- Fields definition -->
        <fields>
            <field name="unique_id" type="string" indexed="true" required="true" />
            <field name="products_id" type="string" indexed="true" stored="true" required="true" />
            <field name="sku" type="text_keyword" indexed="true" stored="true" omitNorms="true" />
            <field name="category_id" type="int" indexed="true" stored="true" multiValued="true" />
            <field name="store_id" type="int" indexed="true" stored="true" />
            <field name="website_id" type="int" indexed="true" stored="true" />
            <field name="product_status" type="int" indexed="true" stored="true" />
            <field name="category_path" type="text_path" indexed="true" stored="true" multiValued="true" omitNorms="true" />
            <field name="textSpell" type="text_ws" stored="true" indexed="true" multiValued="true" omitNorms="true" />
            <field name="textSearch" type="edgytext" indexed="true" stored="true" multiValued="true" omitNorms="true" omitTermFreqAndPositions="true" />
            <field name="textSearchText" type="text_ws" indexed="true" stored="true" multiValued="true" omitNorms="true" omitTermFreqAndPositions="true" />
            <field name="_version_" type="long" indexed="true" stored="true" />
            <!--Dynamic fields -->
            <dynamicField name="*_int" type="int" indexed="true" stored="true" />
            <dynamicField name="*_varchar" type="text_keyword" indexed="true" stored="true" omitNorms="true" omitTermFreqAndPositions="true" />
            <dynamicField name="*_text" type="edgytext" indexed="true" stored="true" multiValued="true" omitNorms="true" omitTermFreqAndPositions="true" />
            <dynamicField name="*_decimal" type="float" indexed="true" stored="true" />
            <dynamicField name="*_datetime" type="date" indexed="true" stored="true" />
            <dynamicField name="*_static" type="string" indexed="true" stored="true" />
            <dynamicField name="*_boost" type="edgytext" indexed="true" stored="true" multiValued="true" />
            <dynamicField name="*_boost_exact" type="text_ws" indexed="true" stored="true" multiValued="true" />
            <dynamicField name="*_facet" type="text_keyword" indexed="true" stored="true" multiValued="true" omitNorms="true" />
        </fields>

Update-2:

solrconfig.xml

도움이 되었습니까?

해결책

Your textSearch field of type edgytext is based on the letter-level ngrams, i.e.

In: "bicycle"

Out: "bicy", "bicyc", "icyc", "icycl", "cycl", "cycle", "ycle"

example taken from: https://cwiki.apache.org/confluence/display/solr/Tokenizers#Tokenizers-N-GramTokenizer

This will allow for inter-token search results, that you see. If you would like to have token-level n-grams define you search field based on text_shingle. Btw, be careful with the maxShingleSize. Value 7 seems a bit too high, it will significantly contribute to the index size.

다른 팁

I would suggest do this for best results:

  1. Copy paste the text_general FieldType from this and place inside the tag of your schema.xml

  2. Change this line from your current schema.xml to <field name="textSearch" type="text_general" indexed="true" stored="true" multiValued="true" omitNorms="true" omitTermFreqAndPositions="true" />

  3. Restart the Solr server and the re-index the data.

  4. Sample search query - http://solr-server:8983/solr/english/select?q=pen&defType=edismax&qf=textSearch
라이센스 : CC-BY-SA ~와 함께 속성
제휴하지 않습니다 StackOverflow
scroll top