From: dennis Date: Thu, 5 Aug 2010 12:33:42 +0000 (+0200) Subject: Tools for indexing LOC to SOLR server X-Git-Tag: v1.4.4~10^2~1 X-Git-Url: http://git.indexdata.com/?a=commitdiff_plain;h=a839681fa3f873ffab21dc15390f1c9fad29c659;hp=e6b14f04ffa2aab036f27c1de8fd3531e8747fbc;p=pazpar2-moved-to-github.git Tools for indexing LOC to SOLR server post.jar: java client for POSTing loc.sh: script to convert from gz'ed marc records to turbomarc, parpar2 (internal) format to SOLR document format, and then POSTing it to a SOLR server Ignore temporary files. --- diff --git a/solr/.gitignore b/solr/.gitignore new file mode 100644 index 0000000..41134d6 --- /dev/null +++ b/solr/.gitignore @@ -0,0 +1,3 @@ +index.log +part*.dat.gz.* +data \ No newline at end of file diff --git a/solr/loc.sh b/solr/loc.sh new file mode 100755 index 0000000..e561896 --- /dev/null +++ b/solr/loc.sh @@ -0,0 +1,63 @@ +#!/bin/bash +LOG=index.log +MARCDUMP="yaz-marcdump" + +if [ "$SOLR_URL" == "" ] ; then + DEF_HOST=-Durl="http://localhost:8983/solr/update" +else + DEF_HOST=-Durl="$SOLR_URL" +fi + +if [ -d "./data" ] ; then + LOCDATA="./data" +else + LOCDATA=/extra/heikki/locdata +fi + +if [ ! -d "$LOCDATA" ] ; then + echo "$LOCDATA not a directory" + exit 1 +fi + +if [ "$1" == "" ] ; then + FILES="$LOCDATA/part*" +else + FILES="$*" +fi +#echo $FILES + +rm -f $LOG + +function convert() +{ + FILE=$2 + echo "zcat $1 > $FILE.mrc" + zcat $1 > $FILE.mrc + $MARCDUMP -f marc8 -t utf-8 -o turbomarc $FILE.mrc > $FILE.xml + xsltproc ../test/tmarc.xsl $FILE.xml > $FILE.pz + xsltproc ../etc/pz2-solr.xsl $FILE.pz > $FILE.solr + ls -l $FILE.* >> $LOG +} + +if [ "$TWO_PASS" == "1" ] ; then + for d in ${FILES} ; do + date "+%c converting $d" >>$LOG + FILE=`basename $1` + convert $d $FILE + done +fi + +for d in ${FILES} ; do + date "+%c converting $d" >>$LOG + BASE=`basename $d` + FILE=$BASE.solr + if [ ! -f "$FILE" ] ; then + convert $d $BASE + fi + date "+%c indexing $d" >>$LOG + java $DEF_HOST -jar post.jar $FILE + date "+%c indexing $d ended" >>$LOG + #rm tmp.* +done +date "+%c All done" >>$LOG +exit 0 diff --git a/solr/post.jar b/solr/post.jar new file mode 100644 index 0000000..c1aff38 Binary files /dev/null and b/solr/post.jar differ