From c3abb9e97ca7b13388dd536fe5430798aadb2ae5 Mon Sep 17 00:00:00 2001 From: Mike Taylor Date: Thu, 13 Apr 2006 14:53:17 +0000 Subject: [PATCH] Initial revision --- Changes | 10 + MANIFEST | 8 + MANIFEST.SKIP | 1 + META.yml | 12 ++ Makefile.PL | 16 ++ README | 34 ++++ t/Net-Z3950-IRSpy.t | 11 ++ zebra/README | 59 ++++++ zebra/filterconf.xml | 7 + zebra/pqf.properties | 150 +++++++++++++++ zebra/yazserver.xml | 21 ++ zebra/zebra.cfg | 23 +++ zebra/zeerex-2.0.xsd | 484 +++++++++++++++++++++++++++++++++++++++++++++++ zebra/zeerex.xml | 15 ++ zebra/zeerex2index.xsl | 50 +++++ zebra/zeerex2zeerex.xsl | 8 + 16 files changed, 909 insertions(+) create mode 100644 Changes create mode 100644 MANIFEST create mode 100644 MANIFEST.SKIP create mode 100644 META.yml create mode 100644 Makefile.PL create mode 100644 README create mode 100644 t/Net-Z3950-IRSpy.t create mode 100644 zebra/README create mode 100644 zebra/filterconf.xml create mode 100644 zebra/pqf.properties create mode 100644 zebra/yazserver.xml create mode 100644 zebra/zebra.cfg create mode 100644 zebra/zeerex-2.0.xsd create mode 100644 zebra/zeerex.xml create mode 100644 zebra/zeerex2index.xsl create mode 100644 zebra/zeerex2zeerex.xsl diff --git a/Changes b/Changes new file mode 100644 index 0000000..cfd79e2 --- /dev/null +++ b/Changes @@ -0,0 +1,10 @@ +$Id: Changes,v 1.1 2006-04-13 14:53:17 mike Exp $ + +Revision history for Perl extension Net::Z3950::IRSpy. + +0.02 [IN PROGRESS] + +0.01 Wed Apr 5 15:53:47 2006 + - original version; created by h2xs 1.23 with options + -X --name=Net::Z3950::IRSpy --compat-version=5.8.0 --omit-constant --skip-exporter --skip-ppport + diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..bffba75 --- /dev/null +++ b/MANIFEST @@ -0,0 +1,8 @@ +Changes +Makefile.PL +MANIFEST +README +t/Net-Z3950-IRSpy.t +lib/Net/Z3950/IRSpy.pm +META.yml Module meta-data (added by MakeMaker) +MANIFEST.SKIP diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP new file mode 100644 index 0000000..f1d43c6 --- /dev/null +++ b/MANIFEST.SKIP @@ -0,0 +1 @@ +archive diff --git a/META.yml b/META.yml new file mode 100644 index 0000000..a0938e6 --- /dev/null +++ b/META.yml @@ -0,0 +1,12 @@ +# $Id: META.yml,v 1.1 2006-04-13 14:53:17 mike Exp $ + +# http://module-build.sourceforge.net/META-spec.html +#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# +name: Net-Z3950-IRSpy +version: 0.01 +version_from: lib/Net/Z3950/IRSpy.pm +installdirs: site +requires: + +distribution_type: module +generated_by: ExtUtils::MakeMaker version 6.17 diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..dd79c1e --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,16 @@ +# $Id: Makefile.PL,v 1.1 2006-04-13 14:53:17 mike Exp $ + +use 5.008; +use ExtUtils::MakeMaker; +# See lib/ExtUtils/MakeMaker.pm for details of how to influence +# the contents of the Makefile that is written. +WriteMakefile( + NAME => 'Net::Z3950::IRSpy', + VERSION_FROM => 'lib/Net/Z3950/IRSpy.pm', # finds $VERSION + PREREQ_PM => { + Net::Z3950::ZOOM => 1.04, + }, + ($] >= 5.005 ? ## Add these new keywords supported since 5.005 + (ABSTRACT_FROM => 'lib/Net/Z3950/IRSpy.pm', # retrieve abstract from module + AUTHOR => 'Mike Taylor ') : ()), +); diff --git a/README b/README new file mode 100644 index 0000000..6bb120b --- /dev/null +++ b/README @@ -0,0 +1,34 @@ +$Id: README,v 1.1 2006-04-13 14:53:17 mike Exp $ + +Net-Z3950-IRSpy +=============== + +This module exists to implement the IRspy program, which discovers, +analyses and monitors IR servers implementing the Z39.50 and SRU/W +protocols. It is a successor to the ZSpy program. + +INSTALLATION + +To install this module type the following: + + perl Makefile.PL + make + make test + make install + +DEPENDENCIES + +This module requires these other modules and libraries, in addition to +those specified in Makefile.PL: + + Zebra, release 1.4.0 or later: see http://indexdata.com/zebra/ + +COPYRIGHT AND LICENCE + +Copyright (C) 2006 by Index Data ApS. + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.8.7 or, +at your option, any later version of Perl 5 you may have available. + + diff --git a/t/Net-Z3950-IRSpy.t b/t/Net-Z3950-IRSpy.t new file mode 100644 index 0000000..dba53ae --- /dev/null +++ b/t/Net-Z3950-IRSpy.t @@ -0,0 +1,11 @@ +# $Id: Net-Z3950-IRSpy.t,v 1.1 2006-04-13 14:53:17 mike Exp $ + +# change 'tests => 1' to 'tests => last_test_to_print'; +use Test::More tests => 1; +BEGIN { use_ok('Net::Z3950::IRSpy') }; + +######################### + +# Insert your test code below, the Test::More module is use()ed here so read +# its man page ( perldoc Test::More ) for help writing this test script. + diff --git a/zebra/README b/zebra/README new file mode 100644 index 0000000..411f04a --- /dev/null +++ b/zebra/README @@ -0,0 +1,59 @@ +$Id: README,v 1.1 2006-04-13 14:53:18 mike Exp $ + +What's what in this directory: + +README -- This file + +yazserver.xml -- The primary configuration file that controls how + Zebra runs (as a YAZ GFS application) in order to serve the + ZeeRex database. This specifies where to find this database's + own ZeeRex record (zeerex.xml), the configuration for + translating CQL queries into Z39.50 Type-1 (pqf.properties), + and the Zebra-specific configuration (zebra.cfg). + +zeerex.xml -- The static ZeeRex record for this database of ZeeRex + records. It describes what a ZeeRex database looks like, and + how it behaves. (Life would be better if Zebra could at least + partially work this out for itself, but for now we have to go + the long way round.) + +zeerex-2.0.xsd -- The XML Schema describing ZeeRex records, as + downloaded from the official ZeeRex site at: + http://explain.z3950.org/dtd/zeerex-2.0.xsd + This can be used to validate both our own static ZeeRex record + and the records created by IRSpy. + +pqf.properties -- The specification for how CQL queries are translated + into 39.50 Type-1 queries. This file is identical to the one + supplied in the YAZ distribution: we'd just use that if there + was a reliable way to discover where it is. + +zebra.cfg -- Zebra-specific configuration, including the location of + the register files, the location of the XSLT filter + configuration (filterconf.xml), etc. + +filterconf.xml -- Configuration of Zebra's XSLT filter, which uses + XSLT stylesheets to identify the indexable data in incoming + files and to transform records for presentation. + +zeerex2index.xsl -- The indexing stylesheet for ZeeRex records. + +zeerex2zeerex.xsl -- The "no-op" stylesheet for presenting ZeeRex + records. + +db -- A subdirectory containing the actual database: register files, + dictionaries and suchlike. + +-- + +To create the database and start the server: + +xmllint --noout --schema zeerex-2.0.xsd zeerex.xml # Verify +xsltproc zeerex2index.xsl zeerex.xml # Check what indexer will see +zebraidx -c zebra.cfg init # Remove any existing database records +zebraidx -c zebra.cfg update zeerex.xml # Our only record! +zebraidx -c zebra.cfg commit +zebrasrv -f yazserver.xml + +Then interrogate the database with SRU URLs such as: + http://localhost:1313/IR-Explain--1?version=1.1&operation=searchRetrieve&x-pquery=@attr%201=net:host%20localhost \ No newline at end of file diff --git a/zebra/filterconf.xml b/zebra/filterconf.xml new file mode 100644 index 0000000..ec24652 --- /dev/null +++ b/zebra/filterconf.xml @@ -0,0 +1,7 @@ + + + + + + diff --git a/zebra/pqf.properties b/zebra/pqf.properties new file mode 100644 index 0000000..1f5b7d4 --- /dev/null +++ b/zebra/pqf.properties @@ -0,0 +1,150 @@ +# $Id: pqf.properties,v 1.1 2006-04-13 14:53:18 mike Exp $ +# +# Propeties file to drive org.z3950.zing.cql.CQLNode's toPQF() +# back-end and the YAZ CQL-to-PQF converter. This specifies the +# interpretation of various CQL indexes, relations, etc. in terms +# of Type-1 query attributes. +# +# This configuration file generates queries using BIB-1 attributes. +# See http://www.loc.gov/z3950/agency/zing/cql/dc-indexes.html +# for the Maintenance Agency's work-in-progress mapping of Dublin Core +# indexes to Attribute Architecture (util, XD and BIB-2) +# attributes. + +# Identifiers for prefixes used in this file. (index.*) +set.cql = info:srw/cql-context-set/1/cql-v1.1 +set.rec = info:srw/cql-context-set/2/rec-1.0 +set.dc = info:srw/cql-context-set/1/dc-v1.1 +set.bath = http://zing.z3950.org/cql/bath/2.0/ + +# default set (in query) +set = info:srw/cql-context-set/1/dc-v1.1 + +# The default access point and result-set references +index.cql.serverChoice = 1=1016 + # srw.serverChoice is deprecated in favour of cql.serverChoice + # BIB-1 "any" + +index.rec.id = 1=12 + +index.dc.title = 1=4 +index.dc.subject = 1=21 +index.dc.creator = 1=1003 +index.dc.author = 1=1003 + ### Unofficial synonym for "creator" +index.dc.editor = 1=1020 +index.dc.publisher = 1=1018 +index.dc.description = 1=62 + # "abstract" +index.dc.date = 1=30 +index.dc.resourceType = 1=1031 + # guesswork: "Material-type" +index.dc.format = 1=1034 + # guesswork: "Content-type" +index.dc.resourceIdentifier = 1=12 + # "Local number" +index.dc.source = 1=1019 + # "Record-source" +index.dc.language = 1=54 + # "Code--language" +index.dc.relation = 1=? + ### No idea how to represent this +index.dc.coverage = 1=? + ### No idea how to represent this +index.dc.rights = 1=? + ### No idea how to represent this + +# Relation attributes are selected according to the CQL relation by +# looking up the "relation." property: +# +relation.< = 2=1 +relation.le = 2=2 +relation.eq = 2=3 +relation.exact = 2=3 +relation.ge = 2=4 +relation.> = 2=5 +relation.<> = 2=6 + +### These two are not really right: +relation.all = 2=3 +relation.any = 2=3 + +# BIB-1 doesn't have a server choice relation, so we just make the +# choice here, and use equality (which is clearly correct). +relation.scr = 2=3 + +# Relation modifiers. +# +relationModifier.relevant = 2=102 +relationModifier.fuzzy = 5=102 + ### truncation=regExpr-2 (5=102) in Zebra is "fuzzy matching" +relationModifier.stem = 2=101 +relationModifier.phonetic = 2=100 + +# Position attributes may be specified for anchored terms (those +# beginning with "^", which is stripped) and unanchored (those not +# beginning with "^"). This may change when we get a BIB-1 truncation +# attribute that says "do what CQL does". +# +position.first = 3=1 6=1 + # "first in field" +position.any = 3=3 6=1 + # "any position in field" +position.last = 3=4 6=1 + # not a standard BIB-1 attribute +position.firstAndLast = 3=3 6=3 + # search term is anchored to be complete field + +# Structure attributes may be specified for individual relations; a +# default structure attribute my be specified by the pseudo-relation +# "*", to be used whenever a relation not listed here occurs. +# +structure.exact = 4=108 + # string +structure.all = 4=2 +structure.any = 4=2 +structure.* = 4=1 + # phrase + +# Truncation attributes used to implement CQL wildcard patterns. The +# simpler forms, left, right- and both-truncation will be used for the +# simplest patterns, so that we produce PQF queries that conform more +# closely to the Bath Profile. However, when a more complex pattern +# such as "foo*bar" is used, we fall back on Z39.58-style masking. +# +truncation.right = 5=1 +truncation.left = 5=2 +truncation.both = 5=3 +truncation.none = 5=100 +truncation.z3958 = 5=104 + +# Finally, any additional attributes that should always be included +# with each term can be specified in the "always" property. +# +always = 6=1 +# 6=1: completeness = incomplete subfield + + +# Bath Profile support, added Thu Dec 18 13:06:20 GMT 2003 +# See the Bath Profile for SRW at +# http://zing.z3950.org/cql/bath.html +# including the Bath Context Set defined within that document. +# +# In this file, we only map index-names to BIB-1 use attributes, doing +# so in accordance with the specifications of the Z39.50 Bath Profile, +# and leaving the relations, wildcards, etc. to fend for themselves. + +index.bath.keyTitle = 1=33 +index.bath.possessingInstitution = 1=1044 +index.bath.name = 1=1002 +index.bath.personalName = 1=1 +index.bath.corporateName = 1=2 +index.bath.conferenceName = 1=3 +index.bath.uniformTitle = 1=6 +index.bath.isbn = 1=7 +index.bath.issn = 1=8 +index.bath.geographicName = 1=58 +index.bath.notes = 1=63 +index.bath.topicalSubject = 1=1079 +index.bath.genreForm = 1=1075 + diff --git a/zebra/yazserver.xml b/zebra/yazserver.xml new file mode 100644 index 0000000..274ca26 --- /dev/null +++ b/zebra/yazserver.xml @@ -0,0 +1,21 @@ + + + + tcp:@:1313 + + ./ + zebra.cfg + pqf.properties + + + + + localhost + 1313 + IR-Explain---1 + + + + + + diff --git a/zebra/zebra.cfg b/zebra/zebra.cfg new file mode 100644 index 0000000..17da22a --- /dev/null +++ b/zebra/zebra.cfg @@ -0,0 +1,23 @@ +# $Id: zebra.cfg,v 1.1 2006-04-13 14:53:17 mike Exp $ + +# Where to look for config files +#profilePath: /usr/local/share/idzebra/tab:/usr/share/idzebra/tab + +# Where to look for loadable zebra modules +#modulePath: zebra/lib/ + +# store records and record keys internally +storeData: 1 +storeKeys: 1 + +# Use the "xslt" filter with config file filter_alvis_conf.xml +recordtype: xslt.filterconf.xml + +database: IR-Explain---1 + +# where to put registers, and other var content, and how large they may be +register: db/register:100G +shadow: db/shadow:100G +lockdir: db/lock +tmpdir: db/tmp +keytmpdir: db/tmp diff --git a/zebra/zeerex-2.0.xsd b/zebra/zeerex-2.0.xsd new file mode 100644 index 0000000..0ae5fb1 --- /dev/null +++ b/zebra/zeerex-2.0.xsd @@ -0,0 +1,484 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/zebra/zeerex.xml b/zebra/zeerex.xml new file mode 100644 index 0000000..f16f0f9 --- /dev/null +++ b/zebra/zeerex.xml @@ -0,0 +1,15 @@ + + + + + localhost + 1313 + Default + + + IRSpy database of ZeeRex records + Mike Taylor, Index Data Aps, http://www.indexdata.com + mike@indexdata.com + + + diff --git a/zebra/zeerex2index.xsl b/zebra/zeerex2index.xsl new file mode 100644 index 0000000..8eab014 --- /dev/null +++ b/zebra/zeerex2index.xsl @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/zebra/zeerex2zeerex.xsl b/zebra/zeerex2zeerex.xsl new file mode 100644 index 0000000..cfbdde2 --- /dev/null +++ b/zebra/zeerex2zeerex.xsl @@ -0,0 +1,8 @@ + + + + + + + + -- 1.7.10.4