Update configuration to use custom update processor chain instead of copyField to...
authorWayne Schneider <wayne@indexdata.com>
Wed, 29 Jun 2016 16:46:21 +0000 (11:46 -0500)
committerWayne Schneider <wayne@indexdata.com>
Wed, 29 Jun 2016 16:46:21 +0000 (11:46 -0500)
conf/schema.xml
conf/solrconfig-master.xml

index 00e92a5..30f35c9 100644 (file)
 
     <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> 
 
-    <!-- custom field type to prevent accidentally indexing enormous
-         terms in the catchall text field -->
-    <fieldType name="text_trunc" class="solr.TextField" positionIncrementGap="100">
-      <analyzer type="index">
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.StopFilterFactory"
-                ignoreCase="true"
-                words="stopwords.txt"
-                />
-        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
-        <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.SnowballPorterFilterFactory"
-                language="English" protected="protwords.txt"/>
-        <filter class="solr.LengthFilterFactory" min="1" max="255"/>
-      </analyzer>
-      <analyzer type="query">
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
-        <filter class="solr.StopFilterFactory"
-                ignoreCase="true"
-                words="stopwords.txt"
-                />
-        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
-        <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
-      </analyzer>
-    </fieldType>
-
  </types>
 
  <fields>
    <field name="harvest-timestamp" type="date"   indexed="true" stored="true"/>
    <field name="harvest-date"      type="string" indexed="true" stored="true"/>
 
-   <!-- catchall fields, can be implemented via copyField further on in this schema -->
-   <field name="text" type="text_trunc" indexed="true" stored="false" multiValued="true"/>
-   <field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true"/>
-   <field name="payloads" type="payloads" indexed="true" stored="true"/>
+   <!-- default search field, created by the cloneFields update
+        processor chain in solrconfig.xml -->
+   <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
 
    <!-- Dynamic field definitions -->
    <dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
    <dynamicField name="*_path" type="text" indexed="true" stored="true" multiValued="true"/>
 
    <!-- catchall dynamic field -->
-   <dynamicField name="*" type="text" multiValued="true" />
+   <dynamicField name="*" type="text" multiValued="true"/>
  </fields>
 
  <uniqueKey>id</uniqueKey>
  <copyField source="subject" dest="subject_exact"/>
  <copyField source="medium"  dest="medium_exact"/>
  <copyField source="journal-title"   dest="journal-title_exact"/>
- <!-- catchall to index all fields -->
- <copyField source="*" dest="text"/> 
 
 </schema>
index 3b2d5ad..86e9e9e 100644 (file)
        <lst name="defaults">
          <str name="update.chain">dedupe</str>
        </lst>
-       -->
+    -->
+    <lst name="defaults">
+      <str name="update.chain">cloneFields</str>
+    </lst>
   </requestHandler>
 
   <!-- for back compat with clients using /update/json and /update/csv -->
       <str name="captureAttr">true</str>
       <str name="fmap.a">links</str>
       <str name="fmap.div">ignored_</str>
+      <str name="update.chain">cloneFields</str>
     </lst>
   </requestHandler>
 
       <processor class="solr.RunUpdateProcessorFactory" />
     </updateRequestProcessorChain>
   -->
+  <!-- Clone fields processor
+       Hooks in a CloneFieldUpdateProcessor to copy all fields
+       except for binaries to the "text" (default search) field
+  -->
+  <updateRequestProcessorChain name="cloneFields">
+     <processor class="solr.CloneFieldUpdateProcessorFactory">
+       <lst name="source">
+         <str name="fieldRegex">.*</str>
+         <lst name="exclude">
+           <str name="fieldRegex">.*_binary$</str>
+         </lst>
+       </lst>
+       <str name="dest">text</str>
+     </processor>
+     <processor class="solr.LogUpdateProcessorFactory" />
+     <processor class="solr.RunUpdateProcessorFactory" />
+  </updateRequestProcessorChain>    
  
   <!-- Response Writers