欢迎投稿

今日深度:

solr配置,

solr配置,


<dataConfig>
    <!-- Create a dynamic attribute specific transformer  -->
    <script><![CDATA[

        /*
         * Note: all objects must be checked for null!!! As even the "What id"
         * queries seem to be sent through this method, there will be calls to
         * this with all fields other than "id" containting "null".
         */

        function createPremiumProfile(row) {
            return row;
        }

        function categoryTransformer(row) {
            //if this is called we know we got a record
            //so mark this as true
            var category = row.get('category');
            if (category == null) {
                row.put('enabled', false);
            } else {
                row.put('enabled', true);
            }

            return row;
        }

        function userTransformer(row) {
            var platformId = row.get('platform_id');
            if (platformId == 14) {
                row.put('platform_enabled', false);
            } else {
                row.put('platform_enabled', true);
            }
            return row;
        }

    ]]></script>
    <document name="profiles">
        <!--  x
        Query logic ():
            1. Import all active objects (full import, "query")
            2. Later only do updates:
            2.1 Update the index: deltaQuery (to get the ids for changed ads) and deltaImportQuery
            2.2 Remove those posts which have changed its state due to the delta import: postImportDeleteQuery + deletedPkQuery
        -->
        <entity name="premium_user_profile" pk="id" dataSource="datasource"
                transformer="script:createPremiumProfile"
                query="select * from premium_user_profile"
                deltaQuery="select id from premium_user_profile where LAST_MODIFIED_ON > date_sub('${dataimporter.last_index_time}', INTERVAL 90 SECOND)"
                deltaImportQuery="select * from premium_user_profile where id ='${dataimporter.delta.id}'"
                preImportDeleteQuery="(*:* NOT platform_enabled:true) OR (*:* NOT enabled:true)"
                postImportDeleteQuery="(*:* NOT platform_enabled:true) OR (*:* NOT enabled:true)"
        >

            <field column="id" name="id" />
            <field column="LAST_MODIFIED_ON" name="modified_on" />

            <entity name ="premium_user_category"
                    dataSource="datasource"
                    transformer="script:categoryTransformer"
                    query="select category from premium_user_category where user_id='${premium_user_profile.user_id}'">
            </entity>


            <entity name ="user"
                    dataSource="datasource"
                    transformer="script:userTransformer"
                    query="select platform_id from user where id='${premium_user_profile.user_id}'">
            </entity>

        </entity>
    </document>
</dataConfig>

schema.xml

<?xml version="1.0" encoding="UTF-8" ?>

<schema name="kijijiit_public_premium_user" version="1.5">

    <types>
        <!-- field type definitions. The "name" attribute is
           just a label to be used by field definitions.  The "class"
           attribute and any other attributes determine the real
           behavior of the fieldType.
           Class names starting with "solr" refer to java classes in the
           org.apache.solr.analysis package.
        -->

        <!-- The StrField type is not analyzed, but indexed/stored verbatim.
           - StrField and TextField support an optional compressThreshold which
           limits compression (if enabled in the derived fields) to values which
           exceed a certain size (in characters).
        -->
        <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>

        <!-- boolean type: "true" or "false" -->
        <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
        <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
        <fieldtype name="binary" class="solr.BinaryField"/>

        <!-- The optional sortMissingLast and sortMissingFirst attributes are
             currently supported on types that are sorted internally as strings.
               This includes "string","boolean","sint","slong","sfloat","sdouble","pdate"
           - If sortMissingLast="true", then a sort on this field will cause documents
             without the field to come after documents with the field,
             regardless of the requested sort order (asc or desc).
           - If sortMissingFirst="true", then a sort on this field will cause documents
             without the field to come before documents with the field,
             regardless of the requested sort order.
           - If sortMissingLast="false" and sortMissingFirst="false" (the default),
             then default lucene sorting will be used which places docs without the
             field first in an ascending sort and last in a descending sort.
        -->

        <!--
          Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
        -->
        <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
        <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
        <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
        <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>

        <!--
         Numeric field types that index each value at various levels of precision
         to accelerate range queries when the number of values between the range
         endpoints is large. See the javadoc for NumericRangeQuery for internal
         implementation details.

         Smaller precisionStep values (specified in bits) will lead to more tokens
         indexed per value, slightly larger index size, and faster range queries.
         A precisionStep of 0 disables indexing at different precision levels.
        -->
        <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
        <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
        <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
        <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>

        <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
             is a more restricted form of the canonical representation of dateTime
             http://www.w3.org/TR/xmlschema-2/#dateTime
             The trailing "Z" designates UTC time and is mandatory.
             Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
             All other components are mandatory.

             Expressions can also be used to denote calculations that should be
             performed relative to "NOW" to determine the value, ie...

                   NOW/HOUR
                      ... Round to the start of the current hour
                   NOW-1DAY
                      ... Exactly 1 day prior to now
                   NOW/DAY+6MONTHS+3DAYS
                      ... 6 months and 3 days in the future from the start of
                          the current day

             Consult the DateField javadocs for more information.

             Note: For faster range queries, consider the tdate type
          -->
        <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>

        <!-- A Trie based date field for faster date range queries and date faceting. -->
        <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>

        <!-- A text field for italian language -->
        <fieldType name="text_italian" class="solr.TextField" positionIncrementGap="100">
            <analyzer type="index">
                <!--tokenizer class="solr.HTMLStripWhitespaceTokenizerFactory"/ --><!-- Remove HTML/XML and tokenize on whitespace -->
                <tokenizer class="solr.WhitespaceTokenizerFactory"/>
                <charFilter class="solr.HTMLStripCharFilterFactory"/>
                <filter class="solr.TrimFilterFactory"/>
                <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true" enablePositionIncrements="true"/>
                <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
                <filter class="solr.WordDelimiterFilterFactory" catenateWords="1"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.ItalianLightStemFilterFactory" />
                <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
                <filter class="solr.ASCIIFoldingFilterFactory"/>  <!-- filters diacritics -->
            </analyzer>
            <analyzer type="query">
                <tokenizer class="solr.WhitespaceTokenizerFactory"/>
                <filter class="solr.TrimFilterFactory"/>
                <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true" enablePositionIncrements="true"/>
                <filter class="solr.WordDelimiterFilterFactory" />
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.ItalianLightStemFilterFactory" />
                <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
                <filter class="solr.ASCIIFoldingFilterFactory"/>  <!-- filters diacritics -->
            </analyzer>
        </fieldType>

        <!-- Field type for suggestions. this one should not be stemmed!!!! -->
        <fieldType name="text_suggestions" class="solr.TextField" positionIncrementGap="100">
            <analyzer type="index">
                <tokenizer class="solr.PatternTokenizerFactory" pattern="[\p{Punct}\p{Space}]+" />
                <filter class="solr.TrimFilterFactory"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true" enablePositionIncrements="true"/>
            </analyzer>
            <analyzer type="query">
                <tokenizer class="solr.PatternTokenizerFactory" pattern="[\p{Punct}\p{Space}]+" />
                <filter class="solr.TrimFilterFactory"/>
                <filter class="solr.LowerCaseFilterFactory"/>
            </analyzer>
        </fieldType>


        <!-- lowercases the entire field value, keeping it as a single token.  -->
        <!--
        <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
            <analyzer>
                <tokenizer class="solr.KeywordTokenizerFactory"/>
                <filter class="solr.LowerCaseFilterFactory" />
            </analyzer>
        </fieldType>
        -->

        <!--  // -->
        <!--  some handy shorcut types ! -->
        <!--  // -->

        <fieldType name="random" class="solr.RandomSortField" indexed="true" />

        <!-- since fields of this type are by default not stored or indexed, any data added to them will be ignored outright.  -->
        <fieldtype name="ignored" stored="false" indexed="false" multiValued="false" class="solr.StrField" />
        <fieldtype name="string_indexed_and_stored" stored="true" indexed="true" class="solr.StrField"/>
        <!-- unindexed string that is  just stored -->
        <fieldtype name="string_stored_only" stored="true" indexed="false" class="solr.StrField"/>
        <!-- e.g. location_path like "0,117,5111" - TODO: does that needs to be stored ??? -->
        <fieldtype name="text_comma_seperated" stored="true" indexed="true" class="solr.TextField">
            <analyzer>
                <tokenizer class="solr.PatternTokenizerFactory" pattern="\s*,\s*"/>
            </analyzer>
        </fieldtype>

    </types>

    <fields>
        <!-- Fields comes from premium_user_reply  -->
        <field name="id" type="long" indexed="true" stored="true" required="true" multiValued="false"/>
        <field name="user_id" type="long" indexed="true" stored="true" required="true" multiValued="false"/>
        <field name="business_name" type="string" indexed="false" stored="true" required="false" multiValued="false"/>
        <field name="business_description" type="string" indexed="false" stored="true" required="false" multiValued="false"/>
        <field name="phone_number" type="string" indexed="false" stored="true" required="false" multiValued="false"/>
        <field name="url" type="string" indexed="false" stored="true" required="false" multiValued="false"/>
        <field name="area_id" type="long" indexed="false" stored="true" required="true" multiValued="false"/>
        <field name="address" type="string" indexed="false" stored="true" required="false" multiValued="false"/>
        <field name="hourly_rate" type="long" indexed="false" stored="true" required="false" multiValued="false"/>
        <field name="lat" type="float" indexed="false" stored="true" required="false" multiValued="false"/>
        <field name="lng" type="float" indexed="false" stored="true" required="false" multiValued="false"/>
        <field name="img_hash" type="string" indexed="false" stored="true" required="false" multiValued="false"/>
        <field name="status" type="int" indexed="false" stored="true" required="true" multiValued="false"/>
        <field name="creation_date" type="tdate" indexed="true" stored="true" required="true" multiValued="false"/>
        <field name="modified_on" type="tdate" indexed="true" stored="true" required="true" multiValued="false"/>
        <field name="category" type="long" indexed="true" stored="true" required="false" multiValued="true"/>
        <field name="platform_id" type="long" indexed="true" stored="true" required="false" multiValued="true"/>
        <field name="enabled" required="false" type="boolean" indexed="true" stored="true" multiValued="false" />
        <field name="platform_enabled" required="false" type="boolean" indexed="true" stored="true" multiValued="false" />
        <field name="last_post_activated" type="tdate" indexed="true" stored="true" required="true" multiValued="false"/>

        <field name="txt" required="false" type="text_italian" indexed="true" stored="false" multiValued="true" />

        <!-- Here, default is used to create a "timestamp" field indicating when each document was indexed. -->
        <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>


    </fields>

    <!-- Field to use to determine and enforce document uniqueness.
         Unless this field is marked with required="false", it will be a required field
      -->
    <uniqueKey>id</uniqueKey>

    <!-- field for the QueryParser to use when an explicit fieldname is absent -->
    <defaultSearchField>txt</defaultSearchField>

    <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
    <solrQueryParser defaultOperator="AND"/>

    <copyField source="business_name" dest="txt"/>
    <copyField source="business_description" dest="txt"/>

</schema>

solrconfig.xml

<?xml version="1.0" encoding="UTF-8" ?>

<config>
    <!-- In all configuration below, a prefix of "solr." for class names
         is an alias that causes solr to search appropriate packages,
         including org.apache.solr.(search|update|request|core|analysis)

         You may also specify a fully qualified Java classname if you
         have your own custom plugins.
      -->

    <!-- Set this to 'false' if you want solr to continue working after
         it has encountered an severe configuration error.  In a
         production environment, you may want solr to keep working even
         if one handler is mis-configured.

         You may also set this to false using by setting the system
         property:

           -Dsolr.abortOnConfigurationError=false
      -->
    <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>

    <!-- Controls what version of Lucene various components of Solr
         adhere to.  Generally, you want to use the latest version to
         get all bug fixes and improvements. It is highly recommended
         that you fully re-index after changing this setting as it can
         affect both how text is indexed and queried.
      -->
    <luceneMatchVersion>4.10</luceneMatchVersion>
    <dataDir>${solr.core.dataDir}</dataDir>


    <!-- The DirectoryFactory to use for indexes.

         solr.StandardDirectoryFactory, the default, is filesystem
         based and tries to pick the best implementation for the current
         JVM and platform.  One can force a particular implementation
         via solr.MMapDirectoryFactory, solr.NIOFSDirectoryFactory, or
         solr.SimpleFSDirectoryFactory.

         solr.RAMDirectoryFactory is memory based, not
         persistent, and doesn't work with replication.
      -->
    <directoryFactory name="DirectoryFactory"
                      class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>

    <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         Index Config - These settings control low-level behavior of indexing
         Most example settings here show the default value, but are commented
         out, to more easily see where customizations have been made.

         Note: As of Solr 3.6, the <indexDefaults> and <mainIndex> sections
               are deprecated and not shown in the example config. They will
               still work, but will go away for good in 4.0
         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
    <indexConfig>
        <!-- maxFieldLength specifies max number of *tokens* indexed per field. Default: 10000 -->
        <!-- <maxFieldLength>10000</maxFieldLength>  -->
        <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 -->
        <!-- <writeLockTimeout>1000</writeLockTimeout>  -->

        <!-- Expert: Enabling compound file will use less files for the index,
             using fewer file descriptors on the expense of performance decrease.
             Default in Lucene is "true". Default in Solr is "false" (since 3.6) -->
        <useCompoundFile>false</useCompoundFile>

        <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene
             indexing for buffering added documents and deletions before they are
             flushed to the Directory.
             maxBufferedDocs sets a limit on the number of documents buffered
             before flushing.
             If both ramBufferSizeMB and maxBufferedDocs is set, then
             Lucene will flush based on whichever limit is hit first.  -->
        <ramBufferSizeMB>64</ramBufferSizeMB>
        <!-- <maxBufferedDocs>1000</maxBufferedDocs> -->

        <!-- Expert: Merge Policy
             The Merge Policy in Lucene controls how merging of segments is done.
             The default since Solr/Lucene 3.3 is TieredMergePolicy.
             The default since Lucene 2.3 was the LogByteSizeMergePolicy,
             Even older versions of Lucene used LogDocMergePolicy.
          -->
        <!--
            <mergePolicy class="org.apache.lucene.index.TieredMergePolicy">
              <int name="maxMergeAtOnce">10</int>
              <int name="segmentsPerTier">10</int>
            </mergePolicy>
          -->

        <!-- Merge Factor
             The merge factor controls how many segments will get merged at a time.
             For TieredMergePolicy, mergeFactor is a convenience parameter which
             will set both MaxMergeAtOnce and SegmentsPerTier at once.
             For LogByteSizeMergePolicy, mergeFactor decides how many new segments
             will be allowed before they are merged into one.
             Default is 10 for both merge policies.
          -->
        <!--
        <mergeFactor>10</mergeFactor>
          -->

        <!-- Expert: Merge Scheduler
             The Merge Scheduler in Lucene controls how merges are
             performed.  The ConcurrentMergeScheduler (Lucene 2.3 default)
             can perform merges in the background using separate threads.
             The SerialMergeScheduler (Lucene 2.2 default) does not.
         -->
        <!--
           <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
           -->

        <!-- LockFactory

             This option specifies which Lucene LockFactory implementation
             to use.

             single = SingleInstanceLockFactory - suggested for a
                      read-only index or when there is no possibility of
                      another process trying to modify the index.
             native = NativeFSLockFactory - uses OS native file locking.
                      Do not use when multiple solr webapps in the same
                      JVM are attempting to share a single index.
             simple = SimpleFSLockFactory  - uses a plain file for locking

             Defaults: 'native' is default for Solr3.6 and later, otherwise
                       'simple' is the default

             More details on the nuances of each LockFactory...
             http://wiki.apache.org/lucene-java/AvailableLockFactories
        -->
        <!-- <lockType>native</lockType> -->

        <!-- Unlock On Startup

             If true, unlock any held write or commit locks on startup.
             This defeats the locking mechanism that allows multiple
             processes to safely access a lucene index, and should be used
             with care. Default is "false".

             This is not needed if lock type is 'none' or 'single'
         -->
        <!--
        <unlockOnStartup>false</unlockOnStartup>
          -->

        <!-- Expert: Controls how often Lucene loads terms into memory
             Default is 128 and is likely good for most everyone.
          -->
        <!-- <termIndexInterval>128</termIndexInterval> -->

        <!-- If true, IndexReaders will be reopened (often more efficient)
             instead of closed and then opened. Default: true
          -->
        <!--
        <reopenReaders>true</reopenReaders>
          -->

        <!-- Commit Deletion Policy

             Custom deletion policies can be specified here. The class must
             implement org.apache.lucene.index.IndexDeletionPolicy.

             http://lucene.apache.org/java/3_5_0/api/core/org/apache/lucene/index/IndexDeletionPolicy.html

             The default Solr IndexDeletionPolicy implementation supports
             deleting index commit points on number of commits, age of
             commit point and optimized status.

             The latest commit point should always be preserved regardless
             of the criteria.
        -->
        <!--
        <deletionPolicy class="solr.SolrDeletionPolicy">
        -->
        <!-- The number of commit points to be kept -->
        <!-- <str name="maxCommitsToKeep">1</str> -->
        <!-- The number of optimized commit points to be kept -->
        <!-- <str name="maxOptimizedCommitsToKeep">0</str> -->
        <!--
            Delete all commit points once they have reached the given age.
            Supports DateMathParser syntax e.g.
          -->
        <!--
           <str name="maxCommitAge">30MINUTES</str>
           <str name="maxCommitAge">1DAY</str>
        -->
        <!--
        </deletionPolicy>
        -->

        <!-- Lucene Infostream

             To aid in advanced debugging, Lucene provides an "InfoStream"
             of detailed information when indexing.

             Setting The value to true will instruct the underlying Lucene
             IndexWriter to write its debugging info the specified file
          -->
        <!-- <infoStream file="INFOSTREAM.txt">false</infoStream> -->
    </indexConfig>


    <!-- JMX

         This example enables JMX if and only if an existing MBeanServer
         is found, use this if you want to configure JMX through JVM
         parameters. Remove this to disable exposing Solr configuration
         and statistics to JMX.

         For more details see http://wiki.apache.org/solr/SolrJmx
      -->
    <jmx />
    <!-- If you want to connect to a particular server, specify the
         agentId
      -->
    <!-- <jmx agentId="myAgent" /> -->
    <!-- If you want to start a new MBeanServer, specify the serviceUrl -->
    <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
      -->

    <!-- The default high-performance update handler -->
    <updateHandler class="solr.DirectUpdateHandler2">

        <!-- AutoCommit

             Perform a <commit/> automatically under certain conditions.
             Instead of enabling autoCommit, consider using "commitWithin"
             when adding documents.

             http://wiki.apache.org/solr/UpdateXmlMessages

             maxDocs - Maximum number of documents to add since the last
                       commit before automatically triggering a new commit.

             maxTime - Maximum amount of time that is allowed to pass
                       since a document was added before automaticly
                       triggering a new commit.
          -->
        <!--
           <autoCommit>
             <maxDocs>10000</maxDocs>
             <maxTime>1000</maxTime>
           </autoCommit>
          -->

        <!-- Update Related Event Listeners

             Various IndexWriter related events can trigger Listeners to
             take actions.

             postCommit - fired after every commit or optimize command
             postOptimize - fired after every optimize command
          -->
        <!-- The RunExecutableListener executes an external command from a
             hook such as postCommit or postOptimize.

             exe - the name of the executable to run
             dir - dir to use as the current working directory. (default=".")
             wait - the calling thread waits until the executable returns.
                    (default="true")
             args - the arguments to pass to the program.  (default is none)
             env - environment variables to set.  (default is none)
          -->
        <!-- This example shows how RunExecutableListener could be used
             with the script based replication...
             http://wiki.apache.org/solr/CollectionDistribution
          -->
        <!--
           <listener event="postCommit" class="solr.RunExecutableListener">
             <str name="exe">solr/bin/snapshooter</str>
             <str name="dir">.</str>
             <bool name="wait">true</bool>
             <arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
             <arr name="env"> <str>MYVAR=val1</str> </arr>
           </listener>
          -->
    </updateHandler>

    <!-- IndexReaderFactory

         Use the following format to specify a custom IndexReaderFactory,
         which allows for alternate IndexReader implementations.

         ** Experimental Feature **

         Please note - Using a custom IndexReaderFactory may prevent
         certain other features from working. The API to
         IndexReaderFactory may change without warning or may even be
         removed from future releases if the problems cannot be
         resolved.


         ** Features that may not work with custom IndexReaderFactory **

         The ReplicationHandler assumes a disk-resident index. Using a
         custom IndexReader implementation may cause incompatibility
         with ReplicationHandler and may cause replication to not work
         correctly. See SOLR-1366 for details.

      -->
    <!--
    <indexReaderFactory name="IndexReaderFactory" class="package.class">
      <str name="someArg">Some Value</str>
    </indexReaderFactory >
    -->
    <!-- By explicitly declaring the Factory, the termIndexDivisor can
         be specified.
      -->
    <!--
       <indexReaderFactory name="IndexReaderFactory"
                           class="solr.StandardIndexReaderFactory">
         <int name="setTermIndexDivisor">12</int>
       </indexReaderFactory >
      -->

    <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         Query section - these settings control query time things like caches
         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
    <query>
        <!-- Max Boolean Clauses

             Maximum number of clauses in each BooleanQuery,  an exception
             is thrown if exceeded.

             ** WARNING **

             This option actually modifies a global Lucene property that
             will affect all SolrCores.  If multiple solrconfig.xml files
             disagree on this property, the value at any given moment will
             be based on the last SolrCore to be initialized.

          -->
        <maxBooleanClauses>1024</maxBooleanClauses>


        <!-- Solr Internal Query Caches

             There are two implementations of cache available for Solr,
             LRUCache, based on a synchronized LinkedHashMap, and
             FastLRUCache, based on a ConcurrentHashMap.

             FastLRUCache has faster gets and slower puts in single
             threaded operation and thus is generally faster than LRUCache
             when the hit ratio of the cache is high (> 75%), and may be
             faster under other scenarios on multi-cpu systems.
        -->

        <!-- Filter Cache

             Cache used by SolrIndexSearcher for filters (DocSets),
             unordered sets of *all* documents that match a query.  When a
             new searcher is opened, its caches may be prepopulated or
             "autowarmed" using data from caches in the old searcher.
             autowarmCount is the number of items to prepopulate.  For
             LRUCache, the autowarmed items will be the most recently
             accessed items.

             Parameters:
               class - the SolrCache implementation LRUCache or
                   (LRUCache or FastLRUCache)
               size - the maximum number of entries in the cache
               initialSize - the initial capacity (number of entries) of
                   the cache.  (see java.util.HashMap)
               autowarmCount - the number of entries to prepopulate from
                   and old cache.
          -->
        <filterCache class="solr.FastLRUCache"
                     size="5000"
                     initialSize="4000"
                     autowarmCount="2000"/>

        <!-- Query Result Cache

             Caches results of searches - ordered lists of document ids
             (DocList) based on a query, a sort, and the range of documents
             requested.
          -->
        <queryResultCache class="solr.LRUCache"
                          size="4096"
                          initialSize="2048"
                          autowarmCount="2048"/>

        <!-- Document Cache

             Caches Lucene Document objects (the stored fields for each
             document).  Since Lucene internal document ids are transient,
             this cache will not be autowarmed.
          -->
        <documentCache class="solr.LRUCache"
                       size="20000"
                       initialSize="10000"
                       autowarmCount="0"/>

        <!-- Field Value Cache

             Cache used to hold field values that are quickly accessible
             by document id.  The fieldValueCache is created by default
             even if not configured here.
          -->
        <fieldValueCache class="solr.FastLRUCache"
                         size="5"
                         autowarmCount="0"
                         showItems="1" />

        <!-- Custom Cache

             Example of a generic cache.  These caches may be accessed by
             name through SolrIndexSearcher.getCache(),cacheLookup(), and
             cacheInsert().  The purpose is to enable easy caching of
             user/application level data.  The regenerator argument should
             be specified as an implementation of solr.CacheRegenerator
             if autowarming is desired.
          -->
        <!--
           <cache name="myUserCache"
                  class="solr.LRUCache"
                  size="4096"
                  initialSize="1024"
                  autowarmCount="1024"
                  regenerator="com.mycompany.MyRegenerator"
                  />
          -->


        <!-- Lazy Field Loading

             If true, stored fields that are not requested will be loaded
             lazily.  This can result in a significant speed improvement
             if the usual case is to not load all stored fields,
             especially if the skipped fields are large compressed text
             fields.
        -->
        <enableLazyFieldLoading>true</enableLazyFieldLoading>

        <!-- Use Filter For Sorted Query

             A possible optimization that attempts to use a filter to
             satisfy a search.  If the requested sort does not include
             score, then the filterCache will be checked for a filter
             matching the query. If found, the filter will be used as the
             source of document ids, and then the sort will be applied to
             that.

             For most situations, this will not be useful unless you
             frequently get the same search repeatedly with different sort
             options, and none of them ever use "score"
          -->
        <!--
           <useFilterForSortedQuery>true</useFilterForSortedQuery>
          -->

        <!-- Result Window Size

             An optimization for use with the queryResultCache.  When a search
             is requested, a superset of the requested number of document ids
             are collected.  For example, if a search for a particular query
             requests matching documents 10 through 19, and queryWindowSize
             is 50, then documents 0 through 49 will be collected and cached.
             Any further requests in that range can be satisfied via the cache.
          -->
        <queryResultWindowSize>50</queryResultWindowSize>

        <!-- Maximum number of documents to cache for any entry in the
             queryResultCache.
          -->
        <queryResultMaxDocsCached>200</queryResultMaxDocsCached>

        <!-- Query Related Event Listeners

             Various IndexSearcher related events can trigger Listeners to
             take actions.

             newSearcher - fired whenever a new searcher is being prepared
             and there is a current searcher handling requests (aka
             registered).  It can be used to prime certain caches to
             prevent long request times for certain requests.

             firstSearcher - fired whenever a new searcher is being
             prepared but there is no current registered searcher to handle
             requests or to gain autowarming data from.


          -->
        <!-- QuerySenderListener takes an array of NamedList and executes a
             local query request for each NamedList in sequence.
          -->
        <listener event="newSearcher" class="solr.QuerySenderListener">
            <arr name="queries">
                <!--
                   <lst><str name="q">solr</str><str name="sort">price asc</str></lst>
                   <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst>
                  -->
            </arr>
        </listener>
        <listener event="firstSearcher" class="solr.QuerySenderListener">
            <arr name="queries">
                <!--
                <lst>
                    <str name="q">static firstSearcher warming in solrconfig.xml</str>
                </lst>
                -->
            </arr>
        </listener>

        <!-- Use Cold Searcher

             If a search request comes in and there is no current
             registered searcher, then immediately register the still
             warming searcher and use it.  If "false" then all requests
             will block until the first searcher is done warming.
          -->
        <useColdSearcher>false</useColdSearcher>

        <!-- Max Warming Searchers

             Maximum number of searchers that may be warming in the
             background concurrently.  An error is returned if this limit
             is exceeded.

             Recommend values of 1-2 for read-only slaves, higher for
             masters w/o cache warming.
          -->
        <maxWarmingSearchers>2</maxWarmingSearchers>

    </query>

    <!-- Request Dispatcher

         This section contains instructions for how the SolrDispatchFilter
         should behave when processing requests for this SolrCore.

         If you wish to regain use of /select?qt=... style request handler
         dispatching, then first add handleSelect="true" to
         <requestDispatcher>. Then change the name of the request handler
         named "/select" to something else without a leading "/", such as
         simply "select" and add default="true" to it.
     -->
    <requestDispatcher>
        <!-- Request Parsing

             These settings indicate how Solr Requests may be parsed, and
             what restrictions may be placed on the ContentStreams from
             those requests

             enableRemoteStreaming - enables use of the stream.file
             and stream.url parameters for specifying remote streams.
             SearchRequestHandler won't fetch it, but some others do.

             multipartUploadLimitInKB - specifies the max size of
             Multipart File Uploads that Solr will allow in a Request.

             *** WARNING ***
             The settings below authorize Solr to fetch remote files, You
             should make sure your system has some authentication before
             using enableRemoteStreaming="true"

          -->
        <requestParsers enableRemoteStreaming="true"
                        multipartUploadLimitInKB="2048000" />

        <!-- HTTP Caching

             Set HTTP caching related parameters (for proxy caches and clients).

             The options below instruct Solr not to output any HTTP Caching
             related headers

        <httpCaching never304="true" /> -->
        <!-- If you include a <cacheControl> directive, it will be used to
             generate a Cache-Control header (as well as an Expires header
             if the value contains "max-age=")

             By default, no Cache-Control header is generated.

             You can use the <cacheControl> option even if you have set
             never304="true"
          -->
        <!--
           <httpCaching never304="true" >
             <cacheControl>max-age=30, public</cacheControl>
           </httpCaching>
          -->
        <!-- To enable Solr to respond with automatically generated HTTP
             Caching headers, and to response to Cache Validation requests
             correctly, set the value of never304="false"

             This will cause Solr to generate Last-Modified and ETag
             headers based on the properties of the Index.

             The following options can also be specified to affect the
             values of these headers...

             lastModFrom - the default value is "openTime" which means the
             Last-Modified value (and validation against If-Modified-Since
             requests) will all be relative to when the current Searcher
             was opened.  You can change it to lastModFrom="dirLastMod" if
             you want the value to exactly correspond to when the physical
             index was last modified.

             etagSeed="..." is an option you can change to force the ETag
             header (and validation against If-None-Match requests) to be
             different even if the index has not changed (ie: when making
             significant changes to your config file)

             (lastModifiedFrom and etagSeed are both ignored if you use
             the never304="true" option)
          -->

        <httpCaching lastModifiedFrom="openTime"
                     etagSeed="Solr">
            <cacheControl>max-age=30, public</cacheControl>
        </httpCaching>
    </requestDispatcher>

    <!-- Request Handlers

         http://wiki.apache.org/solr/SolrRequestHandler

         Incoming queries will be dispatched to the correct handler
         based on the matching request path piece.

         If a Request Handler is declared with startup="lazy", then it will
         not be initialized until the first request that uses it.

      -->
    <!-- SearchHandler

         http://wiki.apache.org/solr/SearchHandler

         For processing Search Queries, the primary Request Handler
         provided with Solr is "SearchHandler" It delegates to a sequent
         of SearchComponents (see below) and supports distributed
         queries across multiple shards
      -->
    <requestHandler name="/select" class="solr.SearchHandler">
        <!-- default values for query parameters can be specified, these
             will be overridden by parameters in the request
          -->
        <lst name="defaults">
            <str name="echoParams">explicit</str>
            <int name="rows">10</int>
            <str name="df">txt</str>
            <str name="q.op">OR</str>
            <str name="defType">edismax</str>
        </lst>
        <!-- In addition to defaults, "appends" params can be specified
             to identify values which should be appended to the list of
             multi-val params from the query (or the existing "defaults").
          -->
        <!-- In this example, the param "fq=instock:true" would be appended to
             any query time fq params the user may specify, as a mechanism for
             partitioning the index, independent of any user selected filtering
             that may also be desired (perhaps as a result of faceted searching).

             NOTE: there is *absolutely* nothing a client can do to prevent these
             "appends" values from being used, so don't use this mechanism
             unless you are sure you always want it.
          -->
        <!--
           <lst name="appends">
             <str name="fq">inStock:true</str>
           </lst>
          -->
        <!-- "invariants" are a way of letting the Solr maintainer lock down
             the options available to Solr clients.  Any params values
             specified here are used regardless of what values may be specified
             in either the query, the "defaults", or the "appends" params.

             In this example, the facet.field and facet.query params would
             be fixed, limiting the facets clients can use.  Faceting is
             not turned on by default - but if the client does specify
             facet=true in the request, these are the only facets they
             will be able to see counts for; regardless of what other
             facet.field or facet.query params they may specify.

             NOTE: there is *absolutely* nothing a client can do to prevent these
             "invariants" values from being used, so don't use this mechanism
             unless you are sure you always want it.
          -->
        <!--
           <lst name="invariants">
             <str name="facet.field">cat</str>
             <str name="facet.field">manu_exact</str>
             <str name="facet.query">price:[* TO 500]</str>
             <str name="facet.query">price:[500 TO *]</str>
           </lst>
          -->
        <!-- If the default list of SearchComponents is not desired, that
             list can either be overridden completely, or components can be
             prepended or appended to the default list.  (see below)
          -->
        <!--
           <arr name="components">
             <str>nameOfCustomComponent1</str>
             <str>nameOfCustomComponent2</str>
           </arr>
          -->
    </requestHandler>

    <!-- Field Analysis Request Handler

         RequestHandler that provides much the same functionality as
         analysis.jsp. Provides the ability to specify multiple field
         types and field names in the same request and outputs
         index-time and query-time analysis for each of them.

         Request parameters are:
         analysis.fieldname - field name whose analyzers are to be used

         analysis.fieldtype - field type whose analyzers are to be used
         analysis.fieldvalue - text for index-time analysis
         q (or analysis.q) - text for query time analysis
         analysis.showmatch (true|false) - When set to true and when
             query analysis is performed, the produced tokens of the
             field value analysis will be marked as "matched" for every
             token that is produces by the query analysis
     -->
    <requestHandler name="/analysis/field"
                    startup="lazy"
                    class="solr.FieldAnalysisRequestHandler" />


    <!-- Document Analysis Handler

         http://wiki.apache.org/solr/AnalysisRequestHandler

         An analysis handler that provides a breakdown of the analysis
         process of provided docuemnts. This handler expects a (single)
         content stream with the following format:

         <docs>
           <doc>
             <field name="id">1</field>
             <field name="name">The Name</field>
             <field name="text">The Text Value</field>
           </doc>
           <doc>...</doc>
           <doc>...</doc>
           ...
         </docs>

      Note: Each document must contain a field which serves as the
      unique key. This key is used in the returned response to associate
      an analysis breakdown to the analyzed document.

      Like the FieldAnalysisRequestHandler, this handler also supports
      query analysis by sending either an "analysis.query" or "q"
      request parameter that holds the query text to be analyzed. It
      also supports the "analysis.showmatch" parameter which when set to
      true, all field tokens that match the query tokens will be marked
      as a "match".
    -->
    <requestHandler name="/analysis/document"
                    class="solr.DocumentAnalysisRequestHandler"
                    startup="lazy" />

    <!-- Admin Handlers

         Admin Handlers - This will register all the standard admin
         RequestHandlers.
      -->
    <requestHandler name="/admin/"
                    class="solr.admin.AdminHandlers" />
    <!-- This single handler is equivalent to the following... -->
    <!--
       <requestHandler name="/admin/luke"       class="solr.admin.LukeRequestHandler" />
       <requestHandler name="/admin/system"     class="solr.admin.SystemInfoHandler" />
       <requestHandler name="/admin/plugins"    class="solr.admin.PluginInfoHandler" />
       <requestHandler name="/admin/threads"    class="solr.admin.ThreadDumpHandler" />
       <requestHandler name="/admin/properties" class="solr.admin.PropertiesRequestHandler" />
       <requestHandler name="/admin/file"       class="solr.admin.ShowFileRequestHandler" >
      -->
    <!-- If you wish to hide files under ${solr.home}/conf, explicitly
         register the ShowFileRequestHandler using:
      -->
    <!--
       <requestHandler name="/admin/file"
                       class="solr.admin.ShowFileRequestHandler" >
         <lst name="invariants">
           <str name="hidden">synonyms.txt</str>
           <str name="hidden">anotherfile.txt</str>
         </lst>
       </requestHandler>
      -->

    <!-- ping/healthcheck -->
    <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
        <lst name="invariants">
            <str name="q">solrpingquery</str>
        </lst>
        <lst name="defaults">
            <str name="echoParams">all</str>
        </lst>
    </requestHandler>

    <!-- Echo the request contents back to the client -->
    <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
        <lst name="defaults">
            <str name="echoParams">explicit</str>
            <str name="echoHandler">true</str>
        </lst>
    </requestHandler>

    <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler" enable="${solr.master.enabled:false}">
        <lst name="defaults">
            <str name="config">./data-config.xml</str>
            <lst name="datasource">
                <str name="driver">com.mysql.jdbc.Driver</str>
                <str name="url">jdbc:mysql://${solr.db.host:mysqldb}/ean_9?zeroDateTimeBehavior=convertToNull</str>
                <str name="user">${solr.db.user:mysqluser}</str>
                <str name="password">${solr.db.password:mysqlpass}</str>
                <!-- Disable batch size by using -1 param, means stream. This avoids out of mem problem, but is a litle slower. -->
                <str name="batchSize">-1</str>
                <str name="readOnly">true</str>
                <str name="autoCommit">false</str>
            </lst>
        </lst>
    </requestHandler>

    <requestHandler name="/replication" class="solr.ReplicationHandler">
        <lst name="master">
            <str name="enable">${solr.master.enabled}</str>
            <!--Replicate on 'optimize' -->
            <str name="replicateAfter">optimize</str>
            <str name="replicateAfter">startup</str>
            <str name="replicateAfter">commit</str>
        </lst>
        <lst name="slave">
            <str name="enable">${solr.slave.enabled:false}</str>
            <!--fully qualified url for the replication handler of master . -->
            <str name="masterUrl">http://${solr.master.host}:${solr.master.port}/solr/${solr.core.name}/replication</str>

            <!--Interval in which the slave should poll master .Format is HH:mm:ss .
             If this is absent slave does not poll automatically.
             But a snappull can be triggered from the admin or the http API -->
            <str name="pollInterval">00:00:10</str>
        </lst>
    </requestHandler>


    <!-- Update Processors

         Chains of Update Processor Factories for dealing with Update
         Requests can be declared, and then used by name in Update
         Request Processors

         http://wiki.apache.org/solr/UpdateRequestProcessor

      -->
    <!-- Deduplication

         An example dedup update processor that creates the "id" field
         on the fly based on the hash code of some other fields.  This
         example has overwriteDupes set to false since we are using the
         id field as the signatureField and Solr will maintain
         uniqueness based on that anyway.

      -->
    <!--
       <updateRequestProcessorChain name="dedupe">
         <processor class="solr.processor.SignatureUpdateProcessorFactory">
           <bool name="enabled">true</bool>
           <str name="signatureField">id</str>
           <bool name="overwriteDupes">false</bool>
           <str name="fields">name,features,cat</str>
           <str name="signatureClass">solr.processor.Lookup3Signature</str>
         </processor>
         <processor class="solr.LogUpdateProcessorFactory" />
         <processor class="solr.RunUpdateProcessorFactory" />
       </updateRequestProcessorChain>
      -->

    <!--
       This example update chain identifies the language of the incoming
       documents using the langid contrib. The detected language is
       written to field language_s. No field name mapping is done.
       The fields used for detection are text, title, subject and description,
       making this example suitable for detecting languages form full-text
       rich documents injected via ExtractingRequestHandler.
       See more about langId at http://wiki.apache.org/solr/LanguageDetection
    -->
    <!--
     <updateRequestProcessorChain name="langid">
       <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory">
         <str name="langid.fl">text,title,subject,description</str>
         <str name="langid.langField">language_s</str>
         <str name="langid.fallback">en</str>
       </processor>
       <processor class="solr.LogUpdateProcessorFactory" />
       <processor class="solr.RunUpdateProcessorFactory" />
     </updateRequestProcessorChain>
    -->

    <!-- Response Writers

         http://wiki.apache.org/solr/QueryResponseWriter

         Request responses will be written using the writer specified by
         the 'wt' request parameter matching the name of a registered
         writer.

         The "default" writer is the default and will be used if 'wt' is
         not specified in the request.
      -->
    <!-- The following response writers are implicitly configured unless
         overridden...
      -->
    <!--
       <queryResponseWriter name="xml"
                            default="true"
                            class="solr.XMLResponseWriter" />
       <queryResponseWriter name="json" class="solr.JSONResponseWriter"/>
       <queryResponseWriter name="python" class="solr.PythonResponseWriter"/>
       <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/>
       <queryResponseWriter name="php" class="solr.PHPResponseWriter"/>
       <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/>
       <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/>
      -->


    <!--
       Custom response writers can be declared as needed...
      -->
    <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>


    <!-- Query Parsers

         http://wiki.apache.org/solr/SolrQuerySyntax

         Multiple QParserPlugins can be registered by name, and then
         used in either the "defType" param for the QueryComponent (used
         by SearchHandler) or in LocalParams
      -->
    <!-- example of registering a query parser -->
    <!--
       <queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/>
      -->

    <!-- Function Parsers

         http://wiki.apache.org/solr/FunctionQuery

         Multiple ValueSourceParsers can be registered by name, and then
         used as function names when using the "func" QParser.
      -->
    <!-- example of registering a custom function parser  -->
    <!--
       <valueSourceParser name="myfunc"
                          class="com.mycompany.MyValueSourceParser" />
      -->

    <!-- Legacy config for the admin interface -->
    <admin>
        <defaultQuery>*:*</defaultQuery>

        <!-- configure a healthcheck file for servers behind a
             loadbalancer
          -->
        <!--
           <healthcheck type="file">server-enabled</healthcheck>
          -->
    </admin>

</config>

www.htsjk.Com true http://www.htsjk.com/solr/45969.html NewsArticle solr配置, dataConfig !-- Create a dynamic attribute specific transformer -- script![CDATA[ /* * Note: all objects must be checked for null!!! As even the What id * queries seem to be sent through this method, there will be calls to * th...
评论暂时关闭