More work towards SolrCloud/Docker.
authorWayne Schneider <wayne@indexdata.com>
Fri, 12 Aug 2016 23:32:30 +0000 (18:32 -0500)
committerWayne Schneider <wayne@indexdata.com>
Fri, 12 Aug 2016 23:32:30 +0000 (18:32 -0500)
docker/docker-deploy.yml
docker/roles/dev/tasks/main.yml
docker/solr/Dockerfile
docker/solr/lui-solr/admin-extra.html [new file with mode: 0644]
docker/solr/lui-solr/admin-extra.menu-bottom.html [new file with mode: 0644]
docker/solr/lui-solr/admin-extra.menu-top.html [new file with mode: 0644]
docker/solr/lui-solr/protwords.txt [new file with mode: 0644]
docker/solr/lui-solr/schema.xml [new file with mode: 0644]
docker/solr/lui-solr/solrconfig.xml [new file with mode: 0644]
docker/solr/lui-solr/stopwords.txt [new file with mode: 0644]
docker/solr/lui-solr/synonyms.txt [new file with mode: 0644]

index 628095a..ca0d4ab 100644 (file)
       become: yes
       service: name=docker enabled=yes state=started
 
-    - name: Check for luinet network
-      command: docker network ls -f name=luinet -q
-      register: luinet
-
-    - name: Create luinet network
-      command: docker network create luinet
-      when: luinet.stdout == ""
-
     - name: Launch ZooKeeper
+      become: yes
       docker_container: name=zk1 image=jplock/zookeeper
 
-    - name: Get networks for ZooKeeper
-      shell: "docker inspect --format={% raw %}'{{json .NetworkSettings.Networks}}'{% endraw %} zk1"
-      register: zk1_networks
+    - name: Get contents of /solr path in ZooKeeper
+      become: yes
+      command: docker exec -t zk1 bin/zkCli.sh get /solr
+      register: zk1_solr
 
-    - name: Attach ZooKeeper to luinet network
-      command: docker network connect luinet zk1
-      when: (zk1_networks.stdout|from_json).luinet is not defined
+    - name: Create /solr path in ZooKeeper
+      become: yes
+      command: docker exec -t zk1 bin/zkCli.sh create /solr []
+      when: zk1_solr.stdout.find('Node does not exist') != -1
 
-    - name: Detach ZooKeeper from bridge network
-      command: docker network disconnect bridge zk1
-      when: (zk1_networks.stdout|from_json).bridge is defined
+# Separate paths for setting up Solr nodes
 
 - hosts: dev
   roles:
   roles:
     - prod
 
-- hosts: all
+# Back to everyone
 
+- hosts: all
   tasks:
-    - name: Get networks for Solr
-      shell: "docker inspect --format={% raw %}'{{json .NetworkSettings.Networks}}'{% endraw %} solr1"
-      register: solr1_networks
-
-    - name: Attach Solr to luinet network
-      command: docker network connect luinet solr1
-      when: (solr1_networks.stdout|from_json).luinet is not defined
-
-    - name: Detach Solr from bridge network
-      command: docker network disconnect bridge solr1
-      when: (solr1_networks.stdout|from_json).bridge is defined
-
-    # - name: Create Solr container
-    #   docker_container:
-    #     name: solr1
-    #     tty: yes
-    #     published_ports: 8983:8983
-    #     #volumes: /vagrant/conf/solr/solr-home:/opt/solr/server/solr
-    #     command: bash -c '/opt/solr/bin/solr start -f -z zk1:2181'
+    - name: Launch Solr1
+      become: yes
+      docker_container:
+        name: solr1
+        image: lui-solr
+        tty: yes
+        published_ports: 8983:8983
+        links: zk1:ZK1
+        command: bash -c '/opt/solr/bin/solr start -f -z $ZK1_PORT_2181_TCP_ADDR:$ZK1_PORT_2181_TCP_PORT/solr'
+
+    - name: Launch Solr2
+      become: yes
+      docker_container:
+        name: solr2
+        image: lui-solr
+        tty: yes
+        published_ports: 8984:8983
+        links: zk1:ZK1
+        command: bash -c '/opt/solr/bin/solr start -f -z $ZK1_PORT_2181_TCP_ADDR:$ZK1_PORT_2181_TCP_PORT/solr'
+
+    ### HERE I AM ###
+    # wait for ZooKeeper to see all the solr nodes before proceeding
+    # look at Ansible until module
+
+    - name: Check lui collection
+      become: yes
+      command: docker exec -t zk1 bin/zkCli.sh ls /solr/collections
+      register: lui_collection
+
+    - name: Create lui collection
+      become: yes
+      command: docker exec -t solr1 bin/solr create -c lui -d /opt/solr/lui-solr -shards 2
+      when: (lui_collection.stdout_lines|last).find('lui') == -1
+
index 80fff39..5fd2412 100644 (file)
@@ -1,13 +1,4 @@
 ---
 - name: Build the lui-solr image
+  become: yes
   docker_image: name=lui-solr path=/vagrant/docker/solr
-  
-- name: Create Solr container
-  docker_container:
-    name: solr1
-    image: lui-solr
-    tty: yes
-    published_ports: 8983:8983
-    #volumes: /vagrant/conf/solr/solr-home:/opt/solr/server/solr
-    command: bash -c '/opt/solr/bin/solr start -f -z zk1:2181'
-    state: present
index e289f80..f40a78b 100644 (file)
@@ -1,6 +1,4 @@
-# Create Solr data directory as expected by lui-solr solrconfig.xml
+# Copy lui-solr config
 FROM solr:6.1.0
 MAINTAINER Index Data <info@indexdata.com>
-USER root
-RUN mkdir -p /var/lib/masterkey/lui/solr/lui && chown -R solr:solr /var/lib/masterkey/lui/solr
-USER solr
\ No newline at end of file
+COPY lui-solr lui-solr
\ No newline at end of file
diff --git a/docker/solr/lui-solr/admin-extra.html b/docker/solr/lui-solr/admin-extra.html
new file mode 100644 (file)
index 0000000..fecab20
--- /dev/null
@@ -0,0 +1,24 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- The content of this page will be statically included into the top-
+right box of the cores overview page. Uncomment this as an example to 
+see there the content will show up.
+
+<img src="img/ico/construction.png"> This line will appear at the top-
+right box on collection1's Overview
+-->
diff --git a/docker/solr/lui-solr/admin-extra.menu-bottom.html b/docker/solr/lui-solr/admin-extra.menu-bottom.html
new file mode 100644 (file)
index 0000000..3359a46
--- /dev/null
@@ -0,0 +1,25 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- admin-extra.menu-bottom.html -->
+<!--
+<li>
+  <a href="#" style="background-image: url(img/ico/construction.png);">
+    LAST ITEM
+  </a>
+</li>
+-->
diff --git a/docker/solr/lui-solr/admin-extra.menu-top.html b/docker/solr/lui-solr/admin-extra.menu-top.html
new file mode 100644 (file)
index 0000000..0886cee
--- /dev/null
@@ -0,0 +1,25 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- admin-extra.menu-top.html -->
+<!--
+<li>
+  <a href="#" style="background-image: url(img/ico/construction.png);">
+    FIRST ITEM
+  </a>
+</li>
+-->
diff --git a/docker/solr/lui-solr/protwords.txt b/docker/solr/lui-solr/protwords.txt
new file mode 100644 (file)
index 0000000..1dfc0ab
--- /dev/null
@@ -0,0 +1,21 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# Use a protected word file to protect against the stemmer reducing two
+# unrelated words to the same base word.
+
+# Some non-words that normally won't be encountered,
+# just to test that they won't be stemmed.
+dontstems
+zwhacky
+
diff --git a/docker/solr/lui-solr/schema.xml b/docker/solr/lui-solr/schema.xml
new file mode 100644 (file)
index 0000000..9253610
--- /dev/null
@@ -0,0 +1,271 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--  
+ This is the Solr schema file for the Index Data Local Unified
+ Index. This file should be named "schema.xml" and should be in the
+ conf directory for the solr core
+ (i.e. /etc/masterkey/lui/solr-config/schema.xml by default).
+ For customization, see https://wiki.apache.org/solr/SchemaXml and the
+ Schema Design section of the Solr Reference Guide.
+-->
+
+<schema name="Local Unified Index" version="1.6">
+
+    <!-- stock Solr field types from the dist schema.xml -->
+    <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
+    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
+    <fieldtype name="binary" class="solr.BinaryField"/>
+    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
+    <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
+
+    <fieldType name="facet" class="solr.TextField" sortMissingLast="true" omitNorms="true">
+      <analyzer>
+        <tokenizer class="solr.KeywordTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory" />
+        <filter class="solr.TrimFilterFactory" />
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="random" class="solr.RandomSortField" indexed="true" />
+
+    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords.txt"
+                />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords.txt"
+                />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
+        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords.txt"
+                />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
+           maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords.txt"
+                />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
+      <analyzer>
+        <tokenizer class="solr.KeywordTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory" />
+        <filter class="solr.TrimFilterFactory" />
+        <filter class="solr.PatternReplaceFilterFactory"
+                pattern="([^a-z])" replacement="" replace="all"
+        />
+      </analyzer>
+    </fieldType>
+    
+    <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
+      <analyzer>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
+      </analyzer>
+    </fieldtype>
+
+    <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
+      </analyzer>
+    </fieldtype>
+
+    <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.KeywordTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory" />
+      </analyzer>
+    </fieldType>
+
+    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> 
+
+   <!-- defined fields in the lui-solr schema -->
+
+   <field name="id" type="string" indexed="true" stored="true" required="true" /> 
+   <field name="_version_" type="long" indexed="true" stored="true"/>
+
+   <field name="transactionId" type="long" indexed="true" stored="false"/>
+
+   <field name="author"       type="text"   indexed="true" stored="true"  multiValued="true" omitNorms="true"/>
+   <field name="author_exact" type="string" indexed="true" stored="false" multiValued="true" omitNorms="true" docValues="true" />
+   <field name="author-date"  type="text"   indexed="true" stored="true"  multiValued="true" omitNorms="true"/>
+   <field name="author-title" type="text"   indexed="true" stored="true"  multiValued="true" omitNorms="true"/>
+
+   <field name="corporate-date"     type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+   <field name="corporate-location" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+   <field name="corporate-name"     type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+
+   <field name="callnumber" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+   <field name="citation"   type="text" indexed="true" stored="true" multiValued="true"  omitNorms="true"/>
+
+   <field name="date"        type="text"    indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+   <field name="description" type="text_ws" indexed="true" stored="true" multiValued="true"  omitNorms="true"/>
+
+   <field name="edition" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+
+   <field name="electronic-format-instruction" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+   <field name="electronic-format-type"        type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+   <field name="electronic-note"               type="text" indexed="true" stored="true" multiValued="true"  omitNorms="true"/>
+   <field name="electronic-text"               type="text" indexed="true" stored="true" multiValued="true"  omitNorms="true"/>
+   <field name="electronic-url"                type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+
+   <field name="isbn" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+   <field name="issn" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+   <field name="lccn" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+
+   <field name="medium"       type="text"  indexed="true" stored="true"   multiValued="true" omitNorms="true"/>
+   <field name="medium_exact" type="string" indexed="true" stored="false" multiValued="true" omitNorms="true" docValues="true" />
+
+   <field name="meeting-date"     type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+   <field name="meeting-location" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+   <field name="meeting-name"     type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+
+   <field name="series-title" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+
+   <field name="subject"       type="text_ws" indexed="true" stored="true"  multiValued="true" omitNorms="true" />
+   <field name="subject_exact" type="string"  indexed="true" stored="false" multiValued="true" omitNorms="true" docValues="true" />
+   <field name="subject-long"  type="text_ws" indexed="true" stored="true"  multiValued="true" omitNorms="true" />
+
+   <field name="system-control-nr" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
+
+   <field name="tech-rep-nr" type="text" indexed="true" multiValued="true" stored="true"/>
+
+   <field name="title"                  type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-complete"         type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-dates"            type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-medium"           type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-number-section"   type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-remainder"        type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-responsibility"   type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-uniform"          type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-uniform-key"      type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-uniform-media"    type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-uniform-partname" type="text"   indexed="true" multiValued="true" stored="true"/>
+   <field name="title-uniform-parts"    type="text"   indexed="true" multiValued="true" stored="true"/>
+
+   <field name="journal-title"          type="text" indexed="true" multiValued="true"  stored="true" />
+   <field name="journal-title_exact"    type="string" indexed="true" multiValued="true"  stored="false" docValues="true" />
+
+   <field name="physical-accomp"     type="text" indexed="true" stored="true" multiValued="true" />
+   <field name="physical-dimensions" type="text" indexed="true" stored="true" multiValued="true" />
+   <field name="physical-extent"     type="text" indexed="true" stored="true" multiValued="true" />
+   <field name="physical-format"     type="text" indexed="true" stored="true" multiValued="true" />
+   <field name="physical-specified"  type="text" indexed="true" stored="true" multiValued="true" />
+   <field name="physical-unitsize"   type="text" indexed="true" stored="true" multiValued="true" />
+   <field name="physical-unittype"   type="text" indexed="true" stored="true" multiValued="true" />
+
+   <field name="publication-date"  type="text" indexed="true" stored="true" multiValued="true" />
+   <field name="publication-name"  type="text" indexed="true" stored="true" multiValued="true" />
+   <field name="publication-place" type="text" indexed="true" stored="true" multiValued="true" />
+
+   <field name="harvest-timestamp" type="date"   indexed="true" stored="true"/>
+   <field name="harvest-date"      type="string" indexed="true" stored="true"/>
+
+   <!-- default search field, created by the clone-fields update
+        processor chain in solrconfig.xml -->
+   <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
+
+   <!-- Dynamic field definitions -->
+   <dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
+   <dynamicField name="*_s"  type="string"  indexed="true"  stored="true"/>
+   <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
+   <dynamicField name="*_t"  type="text"    indexed="true"  stored="true"/>
+   <dynamicField name="*_b"  type="boolean" indexed="true"  stored="true"/>
+   <dynamicField name="*_f"  type="float"  indexed="true"  stored="true"/>
+   <dynamicField name="*_d"  type="double" indexed="true"  stored="true"/>
+   <dynamicField name="*_dt" type="date"    indexed="true"  stored="true"/>
+   <dynamicField name="*_ti" type="tint"    indexed="true"  stored="true"/>
+   <dynamicField name="*_tl" type="tlong"   indexed="true"  stored="true"/>
+   <dynamicField name="*_tf" type="tfloat"  indexed="true"  stored="true"/>
+   <dynamicField name="*_td" type="tdouble" indexed="true"  stored="true"/>
+   <dynamicField name="*_tdt" type="tdate"  indexed="true"  stored="true"/>
+   <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
+   <dynamicField name="attr_*" type="textgen" indexed="true" stored="true" multiValued="true"/>
+   <dynamicField name="random_*" type="random" />
+   <dynamicField name="*_binary" type="binary" indexed="false" stored="true" multiValued="true"/>
+   <dynamicField name="*_path" type="text" indexed="true" stored="true" multiValued="true"/>
+
+   <!-- catchall dynamic field -->
+   <dynamicField name="*" type="text" multiValued="true"/>
+
+ <uniqueKey>id</uniqueKey>
+
+ <!-- copyField commands -->
+ <copyField source="author"  dest="author_exact"/>
+ <copyField source="subject" dest="subject_exact"/>
+ <copyField source="medium"  dest="medium_exact"/>
+ <copyField source="journal-title"   dest="journal-title_exact"/>
+
+</schema>
diff --git a/docker/solr/lui-solr/solrconfig.xml b/docker/solr/lui-solr/solrconfig.xml
new file mode 100644 (file)
index 0000000..89f91b9
--- /dev/null
@@ -0,0 +1,267 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--  
+     This is the Solr core configuration file for the Index Data Local
+     Unified Index.
+     
+     For more details about configurations options that may appear in
+     this file, see http://wiki.apache.org/solr/SolrConfigXml and the
+     "Configuring solrconfig.xml" section of the Solr Reference
+     Guide.
+-->
+<config>
+
+  <luceneMatchVersion>5.5.1</luceneMatchVersion>
+  <directoryFactory name="DirectoryFactory" 
+                    class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> 
+  <codecFactory class="solr.SchemaCodecFactory"/>
+
+  <!-- Use the ClassicIndexSchemaFactory, not Managed -->
+  <schemaFactory class="ClassicIndexSchemaFactory"/>
+
+  <indexConfig>
+    <lockType>${solr.lock.type:native}</lockType>
+    <!-- logging controlled through log4j.properties -->
+     <infoStream>true</infoStream>
+  </indexConfig>
+
+
+  <!--
+       Enable JMX if and only if an existing MBeanServer
+       is found.
+  -->
+  <jmx />
+
+  <!-- The default high-performance update handler -->
+  <updateHandler class="solr.DirectUpdateHandler2">
+
+    <updateLog>
+      <str name="dir">${solr.ulog.dir:}</str>
+    </updateLog>
+
+    <autoCommit> 
+      <maxTime>${solr.autoCommit.maxTime:15000}</maxTime> 
+      <openSearcher>false</openSearcher> 
+    </autoCommit>
+    
+    <autoSoftCommit> 
+      <maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime> 
+    </autoSoftCommit>
+
+
+  </updateHandler>
+  
+  <query>
+
+    <maxBooleanClauses>1024</maxBooleanClauses>
+
+    <filterCache class="solr.FastLRUCache"
+                 size="512"
+                 initialSize="512"
+                 autowarmCount="0"/>
+
+    <queryResultCache class="solr.LRUCache"
+                     size="512"
+                     initialSize="512"
+                     autowarmCount="0"/>
+   
+    <documentCache class="solr.LRUCache"
+                   size="512"
+                   initialSize="512"
+                   autowarmCount="0"/>
+    
+    <enableLazyFieldLoading>true</enableLazyFieldLoading>
+    <queryResultWindowSize>20</queryResultWindowSize>
+    <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
+
+   <!-- Query Related Event Listeners: warm up the facet cache -->
+    <listener event="newSearcher" class="solr.QuerySenderListener">
+      <arr name="queries">
+        <lst>
+         <str name="q">database:*</str>
+         <str name="facet">true</str>
+         <str name="facet.mincount">1</str>
+         <str name="facet.field">author_exact</str>
+         <str name="facet.field">subject_exact</str>
+         <str name="facet.field">medium_exact</str>
+         <str name="facet.field">date</str>
+         <str name="facet.field">database</str>
+        </lst>
+      </arr>
+    </listener>
+
+    <listener event="firstSearcher" class="solr.QuerySenderListener">
+      <arr name="queries">
+        <lst>
+         <str name="q">database:*</str>
+         <str name="facet">true</str>
+         <str name="facet.mincount">1</str>
+         <str name="facet.field">author_exact</str>
+         <str name="facet.field">subject_exact</str>
+         <str name="facet.field">medium_exact</str>
+         <str name="facet.field">date</str>
+         <str name="facet.field">database</str>
+        </lst>
+      </arr>
+    </listener>
+
+    <useColdSearcher>false</useColdSearcher>
+    <maxWarmingSearchers>2</maxWarmingSearchers>
+
+  </query>
+
+  <requestDispatcher handleSelect="false" >
+
+    <requestParsers enableRemoteStreaming="true" 
+                    multipartUploadLimitInKB="2048000"
+                    formdataUploadLimitInKB="2048"
+                    addHttpRequestToContext="false"/>
+
+    <httpCaching never304="true" />
+    
+  </requestDispatcher>
+
+  <!-- Request Handlers -->
+
+  <requestHandler name="/select" class="solr.SearchHandler">
+     <lst name="defaults">
+       <str name="echoParams">explicit</str>
+       <int name="rows">10</int>
+       <str name="df">text</str>
+       <str name="fl">*,score</str>
+       <str name="q.op">AND</str>
+     </lst>
+  </requestHandler>
+
+  <requestHandler name="/query" class="solr.SearchHandler">
+     <lst name="defaults">
+       <str name="echoParams">explicit</str>
+       <str name="wt">json</str>
+       <str name="indent">true</str>
+       <str name="df">text</str>
+       <str name="fl">*,score</str>
+       <str name="q.op">AND</str>
+     </lst>
+  </requestHandler>
+
+  <requestHandler name="/get" class="solr.RealTimeGetHandler">
+     <lst name="defaults">
+       <str name="omitHeader">true</str>
+       <str name="wt">json</str>
+       <str name="indent">true</str>
+     </lst>
+  </requestHandler>
+
+  <requestHandler name="/export" class="solr.SearchHandler">
+    <lst name="invariants">
+      <str name="rq">{!xport}</str>
+      <str name="wt">xsort</str>
+      <str name="distrib">false</str>
+    </lst>
+
+    <arr name="components">
+      <str>query</str>
+    </arr>
+  </requestHandler>
+
+  <!-- Update Request Handlers -->
+  
+  <initParams path="/update/**">
+    <lst name="defaults">
+      <str name="update.chain">clone-fields</str>
+    </lst>
+  </initParams>
+
+  <requestHandler name="/update" class="solr.UpdateRequestHandler" />
+
+  <requestHandler name="/update/extract" 
+                  startup="lazy"
+                  class="solr.extraction.ExtractingRequestHandler" >
+    <lst name="defaults">
+      <str name="lowernames">true</str>
+      <str name="uprefix">ignored_</str>
+
+      <!-- capture link hrefs but ignore div attributes -->
+      <str name="captureAttr">true</str>
+      <str name="fmap.a">links</str>
+      <str name="fmap.div">ignored_</str>
+    </lst>
+  </requestHandler>
+
+  <!-- analysis handlers -->
+       
+  <requestHandler name="/analysis/field" 
+                  startup="lazy"
+                  class="solr.FieldAnalysisRequestHandler" />
+
+  <requestHandler name="/analysis/document" 
+                  class="solr.DocumentAnalysisRequestHandler" 
+                  startup="lazy" />
+
+  <!-- ping/healthcheck -->
+  <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
+    <lst name="invariants">
+      <str name="q">solrpingquery</str>
+    </lst>
+    <lst name="defaults">
+      <str name="echoParams">all</str>
+    </lst>
+  </requestHandler>
+
+  <!-- Echo the request contents back to the client -->
+  <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
+    <lst name="defaults">
+     <str name="echoParams">explicit</str> 
+     <str name="echoHandler">true</str>
+    </lst>
+  </requestHandler>
+  
+  <!-- Solr Replication -->
+  <requestHandler name="/replication" class="solr.ReplicationHandler" > 
+    <lst name="master">
+      <str name="replicateAfter">commit</str>
+      <str name="replicateAfter">startup</str>
+      <str name="confFiles">schema.xml,stopwords.txt</str>
+    </lst>
+    <!--
+       <lst name="slave">
+         <str name="masterUrl">http://your-master-hostname:8983/solr</str>
+         <str name="pollInterval">00:00:60</str>
+       </lst>
+    -->
+  </requestHandler>
+
+  <!-- Update Processors -->
+  <!-- 
+       Hook in a CloneFieldUpdateProcessor to copy all fields
+       except for binaries to the "text" (default search) field
+  -->
+  <updateRequestProcessorChain name="clone-fields">
+     <processor class="solr.CloneFieldUpdateProcessorFactory">
+       <lst name="source">
+         <str name="fieldRegex">.*</str>
+         <lst name="exclude">
+           <str name="fieldRegex">.*_binary$</str>
+         </lst>
+       </lst>
+       <str name="dest">text</str>
+     </processor>
+     <processor class="solr.LogUpdateProcessorFactory" />
+     <processor class="solr.RunUpdateProcessorFactory" />
+  </updateRequestProcessorChain>    
+  <!-- Response Writers -->
+
+  <queryResponseWriter name="json" class="solr.JSONResponseWriter">
+     <!-- For debugging purposes, JSON responses are written as
+      plain text so that they are easy to read in *any* browser.
+      If you expect a MIME type of "application/json" just remove this override.
+     -->
+    <str name="content-type">text/plain; charset=UTF-8</str>
+  </queryResponseWriter>
+  
+  <!-- Legacy config for the admin interface -->
+  <admin>
+    <defaultQuery>*:*</defaultQuery>
+  </admin>
+
+</config>
diff --git a/docker/solr/lui-solr/stopwords.txt b/docker/solr/lui-solr/stopwords.txt
new file mode 100644 (file)
index 0000000..b5824da
--- /dev/null
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+#Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+s
+such
+t
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
+
diff --git a/docker/solr/lui-solr/synonyms.txt b/docker/solr/lui-solr/synonyms.txt
new file mode 100644 (file)
index 0000000..b0e31cb
--- /dev/null
@@ -0,0 +1,31 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+#some test synonym mappings unlikely to appear in real input text
+aaa => aaaa
+bbb => bbbb1 bbbb2
+ccc => cccc1,cccc2
+a\=>a => b\=>b
+a\,a => b\,b
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma
+