Remove unused ISOLatin1Accent mapping file. Update schema to avoid indexing enormous...
[lui-solr.git] / conf / schema.xml
index 399c040..00e92a5 100644 (file)
     </fieldType>
 
     <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> 
     </fieldType>
 
     <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> 
+
+    <!-- custom field type to prevent accidentally indexing enormous
+         terms in the catchall text field -->
+    <fieldType name="text_trunc" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords.txt"
+                />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.SnowballPorterFilterFactory"
+                language="English" protected="protwords.txt"/>
+        <filter class="solr.LengthFilterFactory" min="1" max="255"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords.txt"
+                />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
+      </analyzer>
+    </fieldType>
+
  </types>
 
  <fields>
  </types>
 
  <fields>
    <field name="harvest-date"      type="string" indexed="true" stored="true"/>
 
    <!-- catchall fields, can be implemented via copyField further on in this schema -->
    <field name="harvest-date"      type="string" indexed="true" stored="true"/>
 
    <!-- catchall fields, can be implemented via copyField further on in this schema -->
-   <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
+   <field name="text" type="text_trunc" indexed="true" stored="false" multiValued="true"/>
    <field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true"/>
    <field name="payloads" type="payloads" indexed="true" stored="true"/>
 
    <field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true"/>
    <field name="payloads" type="payloads" indexed="true" stored="true"/>