From: Mike Taylor Date: Thu, 13 Apr 2006 14:53:17 +0000 (+0000) Subject: Initial revision X-Git-Tag: INITIAL~1 X-Git-Url: http://git.indexdata.com/?p=irspy-moved-to-github.git;a=commitdiff_plain;h=2ee5bb696499e86b8e15501d19fdcc998e0d307e;ds=sidebyside Initial revision --- 2ee5bb696499e86b8e15501d19fdcc998e0d307e diff --git a/Changes b/Changes new file mode 100644 index 0000000..cfd79e2 --- /dev/null +++ b/Changes @@ -0,0 +1,10 @@ +$Id: Changes,v 1.1 2006-04-13 14:53:17 mike Exp $ + +Revision history for Perl extension Net::Z3950::IRSpy. + +0.02 [IN PROGRESS] + +0.01 Wed Apr 5 15:53:47 2006 + - original version; created by h2xs 1.23 with options + -X --name=Net::Z3950::IRSpy --compat-version=5.8.0 --omit-constant --skip-exporter --skip-ppport + diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..bffba75 --- /dev/null +++ b/MANIFEST @@ -0,0 +1,8 @@ +Changes +Makefile.PL +MANIFEST +README +t/Net-Z3950-IRSpy.t +lib/Net/Z3950/IRSpy.pm +META.yml Module meta-data (added by MakeMaker) +MANIFEST.SKIP diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP new file mode 100644 index 0000000..f1d43c6 --- /dev/null +++ b/MANIFEST.SKIP @@ -0,0 +1 @@ +archive diff --git a/META.yml b/META.yml new file mode 100644 index 0000000..a0938e6 --- /dev/null +++ b/META.yml @@ -0,0 +1,12 @@ +# $Id: META.yml,v 1.1 2006-04-13 14:53:17 mike Exp $ + +# http://module-build.sourceforge.net/META-spec.html +#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# +name: Net-Z3950-IRSpy +version: 0.01 +version_from: lib/Net/Z3950/IRSpy.pm +installdirs: site +requires: + +distribution_type: module +generated_by: ExtUtils::MakeMaker version 6.17 diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..dd79c1e --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,16 @@ +# $Id: Makefile.PL,v 1.1 2006-04-13 14:53:17 mike Exp $ + +use 5.008; +use ExtUtils::MakeMaker; +# See lib/ExtUtils/MakeMaker.pm for details of how to influence +# the contents of the Makefile that is written. +WriteMakefile( + NAME => 'Net::Z3950::IRSpy', + VERSION_FROM => 'lib/Net/Z3950/IRSpy.pm', # finds $VERSION + PREREQ_PM => { + Net::Z3950::ZOOM => 1.04, + }, + ($] >= 5.005 ? ## Add these new keywords supported since 5.005 + (ABSTRACT_FROM => 'lib/Net/Z3950/IRSpy.pm', # retrieve abstract from module + AUTHOR => 'Mike Taylor ') : ()), +); diff --git a/README b/README new file mode 100644 index 0000000..6bb120b --- /dev/null +++ b/README @@ -0,0 +1,34 @@ +$Id: README,v 1.1 2006-04-13 14:53:17 mike Exp $ + +Net-Z3950-IRSpy +=============== + +This module exists to implement the IRspy program, which discovers, +analyses and monitors IR servers implementing the Z39.50 and SRU/W +protocols. It is a successor to the ZSpy program. + +INSTALLATION + +To install this module type the following: + + perl Makefile.PL + make + make test + make install + +DEPENDENCIES + +This module requires these other modules and libraries, in addition to +those specified in Makefile.PL: + + Zebra, release 1.4.0 or later: see http://indexdata.com/zebra/ + +COPYRIGHT AND LICENCE + +Copyright (C) 2006 by Index Data ApS. + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.8.7 or, +at your option, any later version of Perl 5 you may have available. + + diff --git a/archive/2006 - 04 - 05 Target Monitor Software.doc b/archive/2006 - 04 - 05 Target Monitor Software.doc new file mode 100644 index 0000000..5329eb0 Binary files /dev/null and b/archive/2006 - 04 - 05 Target Monitor Software.doc differ diff --git a/archive/Net-Z3950-IRSpy-0.01.tar.gz b/archive/Net-Z3950-IRSpy-0.01.tar.gz new file mode 100644 index 0000000..ea927aa Binary files /dev/null and b/archive/Net-Z3950-IRSpy-0.01.tar.gz differ diff --git a/archive/press-release b/archive/press-release new file mode 100644 index 0000000..c2f6b45 --- /dev/null +++ b/archive/press-release @@ -0,0 +1,187 @@ +From mike Thu Apr 6 07:29:26 2006 +X-VM-v5-Data: ([nil nil nil nil t nil nil nil nil] + ["36361" "Wednesday" "5" "April" "2006" "16:03:54" "-0400" "David Dorman" "dorman@indexdata.com" "<7.0.1.0.2.20060405160339.03da1e80@indexdata.com>" "530" "[Staff] Support for NISO Metasearch Initiative" "^X-Spam-Status:" nil nil "4" nil nil nil nil nil nil nil nil nil] + nil) +Return-path: +X-Spam-Checker-Version: SpamAssassin 3.1.0 (2005-09-13) on bagel.indexdata.dk +X-Spam-Level: +Envelope-to: mike@indexdata.com +Delivery-date: Thu, 06 Apr 2006 08:09:00 +0200 +Received: from localhost.localdomain [127.0.0.1] + by localhost with POP3 (fetchmail-6.2.5) + for mike@localhost (single-drop); Thu, 06 Apr 2006 07:29:26 +0100 (BST) +Received: from kebab.indexdata.dk ([83.133.64.60]) + by bagel.indexdata.dk with esmtp (Exim 3.35 #1 (Debian)) + id 1FRNfv-0006yp-00; Thu, 06 Apr 2006 08:08:59 +0200 +Received: from localhost ([127.0.0.1] helo=kebab.indexdata.dk) + by kebab.indexdata.dk with esmtp (Exim 4.50) + id 1FRNcT-0002Nn-5P; Thu, 06 Apr 2006 08:05:25 +0200 +Received: from user.indexdata.dk ([213.150.43.10] helo=bagel.indexdata.dk) + by kebab.indexdata.dk with esmtp (Exim 4.50) id 1FREGw-0007if-6b + for staff@lists.indexdata.dk; Wed, 05 Apr 2006 22:06:40 +0200 +Received: from mailman by bagel.indexdata.dk with local (Exim 3.35 #1 (Debian)) + id 1FREKM-0007Hj-00 + for ; Wed, 05 Apr 2006 22:10:06 +0200 +Received: from post1.wesleyan.edu ([129.133.6.131]) + by bagel.indexdata.dk with esmtp (Exim 3.35 #1 (Debian)) + id 1FREKL-0007HA-00 + for ; Wed, 05 Apr 2006 22:10:05 +0200 +Received: from DAVID.indexdata.com (69.177.235.51.adsl.snet.net + [69.177.235.51] (may be forged)) (authenticated bits=0) + by post1.wesleyan.edu (8.12.11/8.12.11) with ESMTP id k35K3wSC030632; + Wed, 5 Apr 2006 16:04:56 -0400 +Message-Id: <7.0.1.0.2.20060405160339.03da1e80@indexdata.com> +Message-Id: <7.0.1.0.2.20060405150259.03dbfe48@indexdata.com> +X-Mailer: QUALCOMM Windows Eudora Version 7.0.1.0 +Mime-Version: 1.0 +X-Wesleyan-MailScanner-Information: Please contact the ISP for more information +X-Wesleyan-MailScanner: Found to be clean +X-MailScanner-From: dorman@indexdata.com +X-Mailman-Approved-At: Thu, 06 Apr 2006 08:05:23 +0200 +X-BeenThere: staff@lists.indexdata.dk +X-Mailman-Version: 2.1.5 +Precedence: list +List-Id: Index Data's PRIVATE staff discussion list +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +Errors-To: staff-bounces@lists.indexdata.dk +X-SA-Exim-Connect-IP: 127.0.0.1 +X-SA-Exim-Mail-From: staff-bounces@lists.indexdata.dk +X-SA-Exim-Scanned: No (on kebab.indexdata.dk); SAEximRunCond expanded to false +X-Spam-Status: No, score=-2.4 required=5.0 tests=AWL,BAYES_00, + FORGED_RCVD_HELO autolearn=unavailable version=3.1.0 +From: David Dorman +Sender: staff-bounces@lists.indexdata.dk +To: "Index Data News List 3":; +Subject: [Staff] Support for NISO Metasearch Initiative +Date: Wed, 05 Apr 2006 16:03:54 -0400 +X-StripMime: Non-text section removed by stripmime +Content-Type: text/plain; charset="us-ascii"; format=flowed + +Hello to everyone on my Index Data news distribution list. I hope +this note finds you all engaged and feeling well. I am pleased to be +able to report, with the following announcement, that Index Data is +advancing the standardization of metasearching in a way that we are +confident will result in significant cost savings for the library community. + +David Dorman + +INDEX DATA DEVELOPMENT CONTRACT SUPPORTS NISO METASEARCH INITIATIVE + +April 5, 2006 - West Hartford, CT - Index Data, the premier developer +of Open Source information retrieval software for libraries and their +vendors, has just signed a contract with Helsinki University Library, +The National Library of Finland, to develop an information retrieval +target monitoring and analysis tool. This software, which is being +released under the name Keystone Target Manager, will be designed to +meet the specifications of the NISO draft standard Z39.92: +Information Retrieval Service Description, which is based on an +earlier community standard called ZeeRex. + +The Keystone Target Manager will perform the following services: + * Automatically monitor a collection of Z39.50/SRW/SRU targets, +maintaining statistics for availability over time. + * Provide configurable alerting services when targets become unavailable. + * Automatically gather information about the search capabilities +of targets that use standard search protocols, such as +Z39.50/SRW/SRU, for use in configuring IR clients of different types. + * Make the gathered information available in a software-readable +form, as a database of ZeeRex (ANSI/NISO Z39.92) records. (This will +enable sharing of this information via the Internet.) +Juha Hakala, Director of Information Technology at Helsinki +University Library and Chair of the NISO Committee which developed +the NISO Z39.92 draft standard, points out that "up to now +information about Z39.50 or SRU/SRW targets has usually been +discovered and documented manually, which is a laborious and time +consuming process. Also, there has been no efficient means of +exchanging this data among metasearch applications such as portals. +While NISO Z39.92, solves the latter problem, the Keystone Target +Manager will simplify the process of creating accurate target +descriptions and keeping them up-to-date." + +"Given the expertise Index Data has in implementing Z39.50 and +SRU/SRW applications," Hakala continued, "I am confident that the +service descriptions generated by the Target Manager will be useful. +It will be interesting to see how quickly the IR portal vendors and +maintenance agencies such as libraries will adopt this target +management tool, and start sharing on a global scale the target +descriptions harvested with it." + +Pat Stevens, the Interim Executive Director of NISO, was delighted to +learn about this agreement: "NISO thanks Helsinki University and +Index Data for their collaborative work on this project," she stated. +"It comes at just the right time as organizations are now looking for +an effective way to implement Z39.92." + +The Keystone Target Manager will be a major step forward in +standardizing and reducing the costs of maintaining target gateways +for metasearch services. Content providers that support standardized +search interfaces, and federated search services that take advantage +of this soon-to-be-released Open Source/Open Access target management +service, will together be able to reduce the cost of providing +federated search services to libraries. If these lower costs are +passed on to libraries in the form of reduced gateway maintenance +fees, libraries will be able to shift costs from access mechanisms to +licensing content. + +About Index Data + +Index Data has been developing standards-based information retrieval +technology since 1994. Library software vendors as varied as Ex +Libris, Fretwell-Downing, Geac, ISACSOFT, Koha and Polaris use Index +Data's technology in the software they provide to libraries. And +libraries around the world, including the Library of Congress and +many other national libraries, use the company's technology and +services to meet demanding and sophisticated information retrieval +challenges. The company's metasearch service, Keystone Retriever, is +among the most advanced in the industry and is distributed under an +Open Source license. + +-------------------------------------------------- +LIST MANAGEMENT INFO + +All recipients of this newsletter have been individually and manually +added to the Index Data News Distribution List. If you would like to +be deleted from this list, simply reply to this email with a remove +request and I will remove your name. If this email was forwarded to +you and you would like to receive these occasional mailings in the +future, let me know and I will add you to the list. +-------------------------------------------------- + +Yours for a More Open World, + +David + + +David Dorman +US Marketing Manager, Index Data +52 Whitman Ave. +West Hartford, Connecticut 06107 +dorman@indexdata.com +860-389-1568 or toll free 866-489-1568 +fax: 860-561-5613 + +INDEX DATA Means Business +for Open Source and Open Standards +- - - - - - - - - - - - - - - +www.indexdata.com + +_______________________________________________ +Staff mailing list +Staff@lists.indexdata.dk +http://lists.indexdata.dk/cgi-bin/mailman/listinfo/staff + + +--- StripMime Report -- processed MIME parts --- +multipart/mixed + text/plain (text body -- kept) + application/msword + text/plain (text body -- kept) +--- + + diff --git a/t/Net-Z3950-IRSpy.t b/t/Net-Z3950-IRSpy.t new file mode 100644 index 0000000..dba53ae --- /dev/null +++ b/t/Net-Z3950-IRSpy.t @@ -0,0 +1,11 @@ +# $Id: Net-Z3950-IRSpy.t,v 1.1 2006-04-13 14:53:17 mike Exp $ + +# change 'tests => 1' to 'tests => last_test_to_print'; +use Test::More tests => 1; +BEGIN { use_ok('Net::Z3950::IRSpy') }; + +######################### + +# Insert your test code below, the Test::More module is use()ed here so read +# its man page ( perldoc Test::More ) for help writing this test script. + diff --git a/zebra/README b/zebra/README new file mode 100644 index 0000000..411f04a --- /dev/null +++ b/zebra/README @@ -0,0 +1,59 @@ +$Id: README,v 1.1 2006-04-13 14:53:18 mike Exp $ + +What's what in this directory: + +README -- This file + +yazserver.xml -- The primary configuration file that controls how + Zebra runs (as a YAZ GFS application) in order to serve the + ZeeRex database. This specifies where to find this database's + own ZeeRex record (zeerex.xml), the configuration for + translating CQL queries into Z39.50 Type-1 (pqf.properties), + and the Zebra-specific configuration (zebra.cfg). + +zeerex.xml -- The static ZeeRex record for this database of ZeeRex + records. It describes what a ZeeRex database looks like, and + how it behaves. (Life would be better if Zebra could at least + partially work this out for itself, but for now we have to go + the long way round.) + +zeerex-2.0.xsd -- The XML Schema describing ZeeRex records, as + downloaded from the official ZeeRex site at: + http://explain.z3950.org/dtd/zeerex-2.0.xsd + This can be used to validate both our own static ZeeRex record + and the records created by IRSpy. + +pqf.properties -- The specification for how CQL queries are translated + into 39.50 Type-1 queries. This file is identical to the one + supplied in the YAZ distribution: we'd just use that if there + was a reliable way to discover where it is. + +zebra.cfg -- Zebra-specific configuration, including the location of + the register files, the location of the XSLT filter + configuration (filterconf.xml), etc. + +filterconf.xml -- Configuration of Zebra's XSLT filter, which uses + XSLT stylesheets to identify the indexable data in incoming + files and to transform records for presentation. + +zeerex2index.xsl -- The indexing stylesheet for ZeeRex records. + +zeerex2zeerex.xsl -- The "no-op" stylesheet for presenting ZeeRex + records. + +db -- A subdirectory containing the actual database: register files, + dictionaries and suchlike. + +-- + +To create the database and start the server: + +xmllint --noout --schema zeerex-2.0.xsd zeerex.xml # Verify +xsltproc zeerex2index.xsl zeerex.xml # Check what indexer will see +zebraidx -c zebra.cfg init # Remove any existing database records +zebraidx -c zebra.cfg update zeerex.xml # Our only record! +zebraidx -c zebra.cfg commit +zebrasrv -f yazserver.xml + +Then interrogate the database with SRU URLs such as: + http://localhost:1313/IR-Explain--1?version=1.1&operation=searchRetrieve&x-pquery=@attr%201=net:host%20localhost \ No newline at end of file diff --git a/zebra/filterconf.xml b/zebra/filterconf.xml new file mode 100644 index 0000000..ec24652 --- /dev/null +++ b/zebra/filterconf.xml @@ -0,0 +1,7 @@ + + + + + + diff --git a/zebra/pqf.properties b/zebra/pqf.properties new file mode 100644 index 0000000..1f5b7d4 --- /dev/null +++ b/zebra/pqf.properties @@ -0,0 +1,150 @@ +# $Id: pqf.properties,v 1.1 2006-04-13 14:53:18 mike Exp $ +# +# Propeties file to drive org.z3950.zing.cql.CQLNode's toPQF() +# back-end and the YAZ CQL-to-PQF converter. This specifies the +# interpretation of various CQL indexes, relations, etc. in terms +# of Type-1 query attributes. +# +# This configuration file generates queries using BIB-1 attributes. +# See http://www.loc.gov/z3950/agency/zing/cql/dc-indexes.html +# for the Maintenance Agency's work-in-progress mapping of Dublin Core +# indexes to Attribute Architecture (util, XD and BIB-2) +# attributes. + +# Identifiers for prefixes used in this file. (index.*) +set.cql = info:srw/cql-context-set/1/cql-v1.1 +set.rec = info:srw/cql-context-set/2/rec-1.0 +set.dc = info:srw/cql-context-set/1/dc-v1.1 +set.bath = http://zing.z3950.org/cql/bath/2.0/ + +# default set (in query) +set = info:srw/cql-context-set/1/dc-v1.1 + +# The default access point and result-set references +index.cql.serverChoice = 1=1016 + # srw.serverChoice is deprecated in favour of cql.serverChoice + # BIB-1 "any" + +index.rec.id = 1=12 + +index.dc.title = 1=4 +index.dc.subject = 1=21 +index.dc.creator = 1=1003 +index.dc.author = 1=1003 + ### Unofficial synonym for "creator" +index.dc.editor = 1=1020 +index.dc.publisher = 1=1018 +index.dc.description = 1=62 + # "abstract" +index.dc.date = 1=30 +index.dc.resourceType = 1=1031 + # guesswork: "Material-type" +index.dc.format = 1=1034 + # guesswork: "Content-type" +index.dc.resourceIdentifier = 1=12 + # "Local number" +index.dc.source = 1=1019 + # "Record-source" +index.dc.language = 1=54 + # "Code--language" +index.dc.relation = 1=? + ### No idea how to represent this +index.dc.coverage = 1=? + ### No idea how to represent this +index.dc.rights = 1=? + ### No idea how to represent this + +# Relation attributes are selected according to the CQL relation by +# looking up the "relation." property: +# +relation.< = 2=1 +relation.le = 2=2 +relation.eq = 2=3 +relation.exact = 2=3 +relation.ge = 2=4 +relation.> = 2=5 +relation.<> = 2=6 + +### These two are not really right: +relation.all = 2=3 +relation.any = 2=3 + +# BIB-1 doesn't have a server choice relation, so we just make the +# choice here, and use equality (which is clearly correct). +relation.scr = 2=3 + +# Relation modifiers. +# +relationModifier.relevant = 2=102 +relationModifier.fuzzy = 5=102 + ### truncation=regExpr-2 (5=102) in Zebra is "fuzzy matching" +relationModifier.stem = 2=101 +relationModifier.phonetic = 2=100 + +# Position attributes may be specified for anchored terms (those +# beginning with "^", which is stripped) and unanchored (those not +# beginning with "^"). This may change when we get a BIB-1 truncation +# attribute that says "do what CQL does". +# +position.first = 3=1 6=1 + # "first in field" +position.any = 3=3 6=1 + # "any position in field" +position.last = 3=4 6=1 + # not a standard BIB-1 attribute +position.firstAndLast = 3=3 6=3 + # search term is anchored to be complete field + +# Structure attributes may be specified for individual relations; a +# default structure attribute my be specified by the pseudo-relation +# "*", to be used whenever a relation not listed here occurs. +# +structure.exact = 4=108 + # string +structure.all = 4=2 +structure.any = 4=2 +structure.* = 4=1 + # phrase + +# Truncation attributes used to implement CQL wildcard patterns. The +# simpler forms, left, right- and both-truncation will be used for the +# simplest patterns, so that we produce PQF queries that conform more +# closely to the Bath Profile. However, when a more complex pattern +# such as "foo*bar" is used, we fall back on Z39.58-style masking. +# +truncation.right = 5=1 +truncation.left = 5=2 +truncation.both = 5=3 +truncation.none = 5=100 +truncation.z3958 = 5=104 + +# Finally, any additional attributes that should always be included +# with each term can be specified in the "always" property. +# +always = 6=1 +# 6=1: completeness = incomplete subfield + + +# Bath Profile support, added Thu Dec 18 13:06:20 GMT 2003 +# See the Bath Profile for SRW at +# http://zing.z3950.org/cql/bath.html +# including the Bath Context Set defined within that document. +# +# In this file, we only map index-names to BIB-1 use attributes, doing +# so in accordance with the specifications of the Z39.50 Bath Profile, +# and leaving the relations, wildcards, etc. to fend for themselves. + +index.bath.keyTitle = 1=33 +index.bath.possessingInstitution = 1=1044 +index.bath.name = 1=1002 +index.bath.personalName = 1=1 +index.bath.corporateName = 1=2 +index.bath.conferenceName = 1=3 +index.bath.uniformTitle = 1=6 +index.bath.isbn = 1=7 +index.bath.issn = 1=8 +index.bath.geographicName = 1=58 +index.bath.notes = 1=63 +index.bath.topicalSubject = 1=1079 +index.bath.genreForm = 1=1075 + diff --git a/zebra/yazserver.xml b/zebra/yazserver.xml new file mode 100644 index 0000000..274ca26 --- /dev/null +++ b/zebra/yazserver.xml @@ -0,0 +1,21 @@ + + + + tcp:@:1313 + + ./ + zebra.cfg + pqf.properties + + + + + localhost + 1313 + IR-Explain---1 + + + + + + diff --git a/zebra/zebra.cfg b/zebra/zebra.cfg new file mode 100644 index 0000000..17da22a --- /dev/null +++ b/zebra/zebra.cfg @@ -0,0 +1,23 @@ +# $Id: zebra.cfg,v 1.1 2006-04-13 14:53:17 mike Exp $ + +# Where to look for config files +#profilePath: /usr/local/share/idzebra/tab:/usr/share/idzebra/tab + +# Where to look for loadable zebra modules +#modulePath: zebra/lib/ + +# store records and record keys internally +storeData: 1 +storeKeys: 1 + +# Use the "xslt" filter with config file filter_alvis_conf.xml +recordtype: xslt.filterconf.xml + +database: IR-Explain---1 + +# where to put registers, and other var content, and how large they may be +register: db/register:100G +shadow: db/shadow:100G +lockdir: db/lock +tmpdir: db/tmp +keytmpdir: db/tmp diff --git a/zebra/zeerex-2.0.xsd b/zebra/zeerex-2.0.xsd new file mode 100644 index 0000000..0ae5fb1 --- /dev/null +++ b/zebra/zeerex-2.0.xsddiff --git a/zebra/zeerex.xml b/zebra/zeerex.xml new file mode 100644 index 0000000..f16f0f9 --- /dev/null +++ b/zebra/zeerex.xml @@ -0,0 +1,15 @@ + + + + + localhost + 1313 + Default + + + IRSpy database of ZeeRex records + Mike Taylor, Index Data Aps, http://www.indexdata.com + mike@indexdata.com + + + diff --git a/zebra/zeerex2index.xsl b/zebra/zeerex2index.xsl new file mode 100644 index 0000000..8eab014 --- /dev/null +++ b/zebra/zeerex2index.xsl @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/zebra/zeerex2zeerex.xsl b/zebra/zeerex2zeerex.xsl new file mode 100644 index 0000000..cfbdde2 --- /dev/null +++ b/zebra/zeerex2zeerex.xsl @@ -0,0 +1,8 @@ + + + + + + + +