From 7b25277add2aae5caabee02213911aeeb65030c8 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 5 Sep 2006 12:01:31 +0000 Subject: [PATCH] Put man pages in reference section of main documentation . Avoid duplicated material for man pages. Put Zebra version in productnumber tag of man page. --- doc/.cvsignore | 1 + doc/Makefile.am | 39 +-- doc/administration.xml | 19 +- doc/entities.ent | 15 +- doc/field-structure.xml | 24 +- doc/idzebra-config-man.xml | 142 ---------- doc/idzebra-config.xml | 141 ++++++++++ doc/server.xml | 671 -------------------------------------------- doc/zebra.xml | 20 +- doc/zebraidx-commands.xml | 77 ----- doc/zebraidx-man.xml | 99 ------- doc/zebraidx-options.xml | 137 --------- doc/zebraidx.xml | 279 +++++++++++++++--- doc/zebrasrv-man.xml | 144 ---------- doc/zebrasrv-options.xml | 4 +- doc/zebrasrv-synopsis.xml | 4 +- doc/zebrasrv-virtual.xml | 22 +- doc/zebrasrv.xml | 509 +++++++++++++++++++++++++++++++++ 18 files changed, 958 insertions(+), 1389 deletions(-) delete mode 100644 doc/idzebra-config-man.xml create mode 100644 doc/idzebra-config.xml delete mode 100644 doc/server.xml delete mode 100644 doc/zebraidx-commands.xml delete mode 100644 doc/zebraidx-man.xml delete mode 100644 doc/zebraidx-options.xml delete mode 100644 doc/zebrasrv-man.xml create mode 100644 doc/zebrasrv.xml diff --git a/doc/.cvsignore b/doc/.cvsignore index 7ba19de..6687c9d 100644 --- a/doc/.cvsignore +++ b/doc/.cvsignore @@ -17,3 +17,4 @@ tkl.xsl *.php htmlhelp.hhp toc.hhc +manref.xml diff --git a/doc/Makefile.am b/doc/Makefile.am index 1e6dee9..e79db22 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -1,14 +1,15 @@ -## $Id: Makefile.am,v 1.59 2006-09-04 12:11:40 adam Exp $ +## $Id: Makefile.am,v 1.60 2006-09-05 12:01:31 adam Exp $ docdir=$(datadir)/doc/$(PACKAGE)$(PACKAGE_SUFFIX) SUBDIRS = common +XMLMAN = zebraidx.xml zebrasrv.xml idzebra-config.xml + XMLFILES = \ administration.xml \ architecture.xml \ examples.xml \ field-structure.xml \ - idzebra-config-man.xml \ indexdata.xml \ installation.xml \ introduction.xml \ @@ -18,16 +19,11 @@ XMLFILES = \ quickstart.xml \ recordmodel-alvisxslt.xml \ recordmodel-grs.xml \ - server.xml \ + manref.xml \ zebra.xml \ - zebraidx-commands.xml \ - zebraidx-man.xml \ - zebraidx-options.xml \ - zebraidx.xml \ - zebrasrv-man.xml \ zebrasrv-options.xml \ zebrasrv-synopsis.xml \ - zebrasrv-virtual.xml + zebrasrv-virtual.xml HTMLFILES = index.html @@ -38,26 +34,24 @@ MANFILES=zebraidx$(PACKAGE_SUFFIX).1 \ zebrasrv$(PACKAGE_SUFFIX).8 \ idzebra-config$(PACKAGE_SUFFIX).1 -REFFILES=zebraidx-man.xml zebrasrv-man.xml idzebra-config-man.xml - doc_DATA = $(HTMLFILES) $(PNGFILES) man_MANS = $(MANFILES) -EXTRA_DIST = $(XMLFILES) $(REFFILES) \ - $(doc_DATA) $(EPSFILES) $(man_MANS) $(REFFILES) \ +EXTRA_DIST = $(XMLFILES) $(XMLMAN) \ + $(doc_DATA) $(EPSFILES) $(man_MANS) \ marc_indexing.xml entities.ent local.ent.in -zebraidx$(PACKAGE_SUFFIX).1: zebraidx-man.xml zebraidx-options.xml zebraidx-commands.xml - $(MAN_COMPILE) $(srcdir)/zebraidx-man.xml +zebraidx$(PACKAGE_SUFFIX).1: zebraidx.xml + $(MAN_COMPILE) $(srcdir)/zebraidx.xml mv zebraidx.1 zebraidx$(PACKAGE_SUFFIX).1 -zebrasrv$(PACKAGE_SUFFIX).8: zebrasrv-man.xml zebrasrv-options.xml \ +zebrasrv$(PACKAGE_SUFFIX).8: zebrasrv.xml zebrasrv-options.xml \ zebrasrv-synopsis.xml zebrasrv-virtual.xml - $(MAN_COMPILE) $(srcdir)/zebrasrv-man.xml + $(MAN_COMPILE) $(srcdir)/zebrasrv.xml mv zebrasrv.8 zebrasrv$(PACKAGE_SUFFIX).8 -idzebra-config$(PACKAGE_SUFFIX).1: idzebra-config-man.xml - $(MAN_COMPILE) $(srcdir)/idzebra-config-man.xml +idzebra-config$(PACKAGE_SUFFIX).1: idzebra-config.xml + $(MAN_COMPILE) $(srcdir)/idzebra-config.xml mv idzebra-config.1 idzebra-config$(PACKAGE_SUFFIX).1 $(HTMLFILES): $(XMLFILES) @@ -77,6 +71,13 @@ zebra.pdf: $(XMLFILES) pdfjadetex zebra.tex >/dev/null pdfjadetex zebra.tex >/dev/null + +manref.xml: $(XMLMAN) $(srcdir)/common/ref2dbinc.xsl + rm -f manref.xml + for i in $(XMLMAN); do \ + xsltproc $(srcdir)/common/stripref.xsl $(srcdir)/$$i | sed 1d >> manref.xml; \ + done + gils.txt: gils.sgml sgml2txt -f gils.sgml diff --git a/doc/administration.xml b/doc/administration.xml index beba166..c4afea4 100644 --- a/doc/administration.xml +++ b/doc/administration.xml @@ -1,5 +1,5 @@ - + Administrating Zebra + @@ -7,25 +7,14 @@ - - - - + -'> -'> -'> -'> diff --git a/doc/field-structure.xml b/doc/field-structure.xml index c354795..758542b 100644 --- a/doc/field-structure.xml +++ b/doc/field-structure.xml @@ -1,5 +1,5 @@ - + Field Structure and Character Sets @@ -36,18 +36,18 @@ - index field type code + index field type code This directive introduces a new search index code. The argument is a one-character code to be used in the .abs files to select this particular index type. An index, roughly, corresponds to a particular structure attribute during search. Refer - to . + to . - sort field code type + sort field code type This directive introduces a @@ -59,11 +59,11 @@ - completeness boolean + completeness boolean This directive enables or disables complete field indexing. - The value of the boolean should be 0 + The value of the boolean should be 0 (disable) or 1. If completeness is enabled, the index entry will contain the complete contents of the field (up to a limit), with words (non-space characters) separated by single space characters @@ -77,7 +77,7 @@ - charmap filename + charmap filename This is the filename of the character @@ -98,7 +98,7 @@ - lowercase value-set + lowercase value-set This directive introduces the basic value set of the field type. @@ -151,7 +151,7 @@ - uppercase value-set + uppercase value-set This directive introduces the @@ -161,7 +161,7 @@ - space value-set + space value-set This directive introduces the character @@ -175,8 +175,8 @@ - map value-set - target + map value-set + target This directive introduces a mapping between each of the diff --git a/doc/idzebra-config-man.xml b/doc/idzebra-config-man.xml deleted file mode 100644 index 6be1056..0000000 --- a/doc/idzebra-config-man.xml +++ /dev/null @@ -1,142 +0,0 @@ - - %local; -]> - - - - - idzebra-config - 1 - - - - idzebra-config - Script to get information about idzebra - - - - - idzebra-config - - - - - - - - libraries - - - - DESCRIPTION - - idzebra-config is a script that returns information - that your own software should use to build software that uses idzebra. - - - - The following libraries are supported: - - - - None - - - - - OPTIONS - - - - --prefix[=DIR] - - Returns prefix of idzebra or assume a different one if DIR is - specified. - - - - - --version - - Returns version of idzebra. - - - - - --libs - - Library specification be used when linking with idzebra. - - - - - --lalibs - - Return library specification. - - - - - --cflags - - Return C Compiler flags. - - - - - --tab - - Return directory of idzebra tables. - - - - - --modules - - Return directory for Zebra modules. - - - - - - - FILES - - &prefix;/bin/idzebra-config-2.0 - - - &prefix;/lib/libidzebra*2.0.a - - - &prefix;/include/idzebra-2.0/idzebra/*.h - - - - - - diff --git a/doc/idzebra-config.xml b/doc/idzebra-config.xml new file mode 100644 index 0000000..efb9e78 --- /dev/null +++ b/doc/idzebra-config.xml @@ -0,0 +1,141 @@ + + %local; + + %entities; + + %common; +]> + + + + ZEBRA + &version; + + + + idzebra-config + 1 + + + + idzebra-config + Script to get information about idzebra + + + + + idzebra-config + + + + + + + + libraries + + + + DESCRIPTION + + idzebra-config is a script that returns information + that your own software should use to build software that uses idzebra. + + + + The following libraries are supported: + + + + None + + + + + OPTIONS + + + + --prefix[=DIR] + + Returns prefix of idzebra or assume a different one if DIR is + specified. + + + + + --version + + Returns version of idzebra. + + + + + --libs + + Library specification be used when linking with idzebra. + + + + + --lalibs + + Return library specification. + + + + + --cflags + + Return C Compiler flags. + + + + + --tab + + Return directory of idzebra tables. + + + + + --modules + + Return directory for Zebra modules. + + + + + + + FILES + + &prefix;/bin/idzebra-config-2.0 + + + &prefix;/lib/libidzebra*2.0.a + + + &prefix;/include/idzebra-2.0/idzebra/*.h + + + + + + diff --git a/doc/server.xml b/doc/server.xml deleted file mode 100644 index 5a2f114..0000000 --- a/doc/server.xml +++ /dev/null @@ -1,671 +0,0 @@ - - - The Z39.50 Server - - - Running the Z39.50 Server (zebrasrv) - - - - - - - Description - Zebra is a high-performance, general-purpose structured text indexing - and retrieval engine. It reads structured records in a variety of input - formats (eg. email, XML, MARC) and allows access to them through exact - boolean search expressions and relevance-ranked free-text queries. - - - zebrasrv is the Z39.50 and SRW/U frontend - server for the Zebra indexer. - - - On Unix you can run the zebrasrv - server from the command line - and put it - in the background. It may also operate under the inet daemon. - On WIN32 you can run the server as a console application or - as a WIN32 Service. - - - - - Synopsis - &zebrasrv-synopsis; - - - - Options - - - The options for zebrasrv are the same - as those for YAZ' yaz-ztest. - Option -c specifies a Zebra configuration - file - if omitted zebra.cfg is read. - - - &zebrasrv-options; - - - Files - - zebra.cfg - - - See Also - - - zebraidx - 1 - , - - yaz-ztest - 8 - - - - The Zebra software is Copyright Index Data - http://www.indexdata.dk - and distributed under the - GPLv2 license. - - - - - - - - - Z39.50 Protocol Support and Behavior - - - Initialization - - - During initialization, the server will negotiate to version 3 of the - Z39.50 protocol, and the option bits for Search, Present, Scan, - NamedResultSets, and concurrentOperations will be set, if requested by - the client. The maximum PDU size is negotiated down to a maximum of - 1 MB by default. - - - - - - Search - - - - - The supported query type are 1 and 101. All operators are currently - supported with the restriction that only proximity units of type "word" - are supported for the proximity operator. - Queries can be arbitrarily complex. - Named result sets are supported, and result sets can be used as operands - without limitations. - Searches may span multiple databases. - - - - The server has full support for piggy-backed retrieval (see - also the following section). - - - - - - Present - - The present facility is supported in a standard fashion. The requested - record syntax is matched against the ones supported by the profile of - each record retrieved. If no record syntax is given, SUTRS is the - default. The requested element set name, again, is matched against any - provided by the relevant record profiles. - - - - Scan - - The attribute combinations provided with the termListAndStartPoint are - processed in the same way as operands in a query (see above). - Currently, only the term and the globalOccurrences are returned with - the termInfo structure. - - - - Sort - - - Z39.50 specifies three different types of sort criteria. - Of these Zebra supports the attribute specification type in which - case the use attribute specifies the "Sort register". - Sort registers are created for those fields that are of type "sort" in - the default.idx file. - The corresponding character mapping file in default.idx specifies the - ordinal of each character used in the actual sort. - - - - Z39.50 allows the client to specify sorting on one or more input - result sets and one output result set. - Zebra supports sorting on one result set only which may or may not - be the same as the output result set. - - - - Close - - If a Close PDU is received, the server will respond with a Close PDU - with reason=FINISHED, no matter which protocol version was negotiated - during initialization. If the protocol version is 3 or more, the - server will generate a Close PDU under certain circumstances, - including a session timeout (60 minutes by default), and certain kinds of - protocol errors. Once a Close PDU has been sent, the protocol - association is considered broken, and the transport connection will be - closed immediately upon receipt of further data, or following a short - timeout. - - - - - Explain - - Zebra maintains a "classic" - Explain database - on the side. - This database is called IR-Explain-1 and can be - searched using the attribute set exp-1. - - - The records in the explain database are of type - grs.sgml. - The root element for the Explain grs.sgml records is - explain, thus - explain.abs is used for indexing. - - - - Zebra must be able to locate - explain.abs in order to index the Explain - records properly. Zebra will work without it but the information - will not be searchable. - - - - - - - - - The SRU/SRW Server - - In addition to Z39.50, Zebra supports the more recent and - web-friendly IR protocol SRU, described at - . - SRU is ``Search/Retrieve via URL'', a simple, REST-like protocol - that uses HTTP GET to request search responses. The request - itself is made of parameters such as - query, - startRecord, - maximumRecords - and - recordSchema; - the response is an XML document containing hit-count, result-set - records, diagnostics, etc. SRU can be thought of as a re-casting - of Z39.50 semantics in web-friendly terms; or as a standardisation - of the ad-hoc query parameters used by search engines such as Google - and AltaVista; or as a superset of A9's OpenSearch (which it - predates). - - - Zebra further supports SRW, described at - . - SRW is the ``Search/Retrieve Web Service'', a SOAP-based alternative - implementation of the abstract protocol that SRU implements as HTTP - GET requests. In SRW, requests are encoded as XML documents which - are posted to the server. The responses are identical to those - returned by SRU servers, except that they are wrapped in a several - layers of SOAP envelope. - - - Zebra supports all three protocols - Z39.50, SRU and SRW - on the - same port, recognising what protocol is used by each incoming - requests and handling them accordingly. This is a achieved through - the use of Deep Magic; civilians are warned not to stand too close. - - - From here on, ``SRU'' is used to indicate both the SRU and SRW - protocols, as they are identical except for the transport used for - the protocol packets and Zebra's support for them is equivalent. - - - - Running the SRU Server (zebrasrv) - - Because Zebra supports all three protocols on one port, it would - seem to follow that the SRU server is run in the same way as - the Z39.50 server, as described above. This is true, but only in - an uninterestingly vacuous way: a Zebra server run in this manner - will indeed recognise and accept SRU requests; but since it - doesn't know how to handle the CQL queries that these protocols - use, all it can do is send failure responses. - - - - It is possible to cheat, by having SRU search Zebra with - a PQF query instead of CQL, using the - x-pquery - parameter instead of - query. - This is a - non-standard extension - of CQL, and a - very naughty - thing to do, but it does give you a way to see Zebra serving SRU - ``right out of the box''. If you start your favourite Zebra - server in the usual way, on port 9999, then you can send your web - browser to: - - - http://localhost:9999/Default?version=1.1 - &operation=searchRetrieve - &x-pquery=mineral - &startRecord=1 - &maximumRecords=1 - - - This will display the XML-formatted SRU response that includes the - first record in the result-set found by the query - mineral. (For clarity, the SRU URL is shown - here broken across lines, but the lines should be joined to gether - to make single-line URL for the browser to submit.) - - - - In order to turn on Zebra's support for CQL queries, it's necessary - to have the YAZ generic front-end (which Zebra uses) translate them - into the Z39.50 Type-1 query format that is used internally. And - to do this, the generic front-end's own configuration file must be - used. This file is described - elsewhere; - the salient point for SRU support is that - zebrasrv - must be started with the - -f frontendConfigFile - option rather than the - -c zebraConfigFile - option, - and that the front-end configuration file must include both a - reference to the Zebra configuration file and the CQL-to-PQF - translator configuration file. - - - A minimal front-end configuration file that does this would read as - follows: - - - - zebra.cfg - ../../tab/pqf.properties - - -]]> - - The - <config> - element contains the name of the Zebra configuration file that was - previously specified by the - -c - command-line argument, and the - <cql2rpn> - element contains the name of the CQL properties file specifying how - various CQL indexes, relations, etc. are translated into Type-1 - queries. - - - A zebra server running with such a configuration can then be - queried using proper, conformant SRU URLs with CQL queries: - - - http://localhost:9999/Default?version=1.1 - &operation=searchRetrieve - &query=title=utah and description=epicent* - &startRecord=1 - &maximumRecords=1 - - - - - SRU and SRW Protocol Support and Behavior - - Zebra running as an SRU server supports SRU version 1.1, including - CQL version 1.1. In particular, it provides support for the - following elements of the protocol. - - - - Search and Retrieval - - Zebra fully supports SRU's core - searchRetrieve - operation, as described at - - - - One of the great strengths of SRU is that it mandates a standard - query language, CQL, and that all conforming implementations can - therefore be trusted to correctly interpret the same queries. It - is with some shame, then, that we admit that Zebra also supports - an additional query language, our own Prefix Query Format (PQF, - ). - A PQF query is submitted by using the extension parameter - x-pquery, - in which case the - query - parameter must be omitted, which makes the request not valid SRU. - Please don't do this. - - - - - Scan - - Zebra supports SRU's - scan - operation, as described at - . - Scanning using CQL syntax is the default, where the - standard scanClause parameter is used. - - - In addition, a - mutant form of SRU scan is supported, using - the non-standard x-pScanClause parameter in - place of the standard scanClause to scan on a - PQF query clause. - - - - - Explain - - Zebra fully supports SRU's core - explain - operation, as described at - - - - The ZeeRex record explaining a database may be requested either - with a fully fledged SRU request (with - operation=explain - and version-number specified) - or with a simple HTTP GET at the server's basename. - The ZeeRex record returned in response is the one embedded - in the YAZ Frontend Server configuration file that is described in the - Virtual Hosts documentation. - - - Unfortunately, the data found in the - CQL-to-PQF text file must be added by hand-craft into the explain - section of the YAZ Frontend Server configuration file to be able - to provide a suitable explain record. - Too bad, but this is all extreme - new alpha stuff, and a lot of work has yet to be done .. - - - There is no linkeage whatsoever between the Z39.50 explain model - and the SRU/SRW explain response (well, at least not implemented - in Zebra, that is ..). Zebra does not provide a means using - Z39.50 to obtain the ZeeRex record. - - - - - Some SRU Examples - - Surf into http://localhost:9999 - to get an explain response, or use - - - - See number of hits for a query - - - - Fetch record 5-7 in Dublin Core format - - - - Even search using PQF queries using the extended naughty - verb x-pquery - - - - Or scan indexes using the extended extremely naughty - verb x-pScanClause - - Don't do this in production code! - But it's a great fast debugging aid. - - - - - Initialization, Present, Sort, Close - - In the Z39.50 protocol, Initialization, Present, Sort and Close - are separate operations. In SRU, however, these operations do not - exist. - - - - - SRU has no explicit initialization handshake phase, but - commences immediately with searching, scanning and explain - operations. - - - - - Neither does SRU have a close operation, since the protocol is - stateless and each request is self-contained. (It is true that - multiple SRU request/response pairs may be implemented as - multiple HTTP request/response pairs over a single persistent - TCP/IP connection; but the closure of that connection is not a - protocol-level operation.) - - - - - Retrieval in SRU is part of the - searchRetrieve operation, in which a search - is submitted and the response includes a subset of the records - in the result set. There is no direct analogue of Z39.50's - Present operation which requests records from an established - result set. In SRU, this is achieved by sending a subsequent - searchRetrieve request with the query - cql.resultSetId=id where - id is the identifier of the previously - generated result-set. - - - - - Sorting in CQL is done within the - searchRetrieve operation - in v1.1, by an - explicit sort parameter, but the forthcoming - v1.2 or v2.0 will most likely use an extension of the query - language, CQL for sorting: see - - - - - - It can be seen, then, that while Zebra operating as an SRU server - does not provide the same set of operations as when operating as a - Z39.50 server, it does provide equivalent functionality. - - - - - - diff --git a/doc/zebra.xml b/doc/zebra.xml index d68594c..b0de59c 100644 --- a/doc/zebra.xml +++ b/doc/zebra.xml @@ -4,12 +4,14 @@ [ %local; - + %entities; - + %common; + + ]> - + Zebra - User's Guide and Reference @@ -70,10 +72,14 @@ &chap-recordmodel-grs; &chap-recordmodel-alvisxslt; &chap-field-structure; - &chap-zebraidx; - &chap-server; - &app-license; - &app-indexdata; + + + Reference + &manref; + + + &app-license; + &app-indexdata; - - - update directory - - - Update the register with the files contained in - directory. - If no directory is provided, a list of files is read from - stdin. - See . - - - - - delete directory - - - Remove the records corresponding to the files found under - directory from the register. - - - - - commit - - - Write the changes resulting from the last update - commands to the register. This command is only available if the use of - shadow register files is enabled - (see ). - - - - - clean - - Clean shadow files and "forget" changes. - - - - create database - - Create database. - - - - drop database - - Drop database (delete database). - - - - init - - Deletes an entire register (all files in shadow+register areas). - - - - diff --git a/doc/zebraidx-man.xml b/doc/zebraidx-man.xml deleted file mode 100644 index f75cbd6..0000000 --- a/doc/zebraidx-man.xml +++ /dev/null @@ -1,99 +0,0 @@ - - - - Zebra - User's Guide - and Reference"> - - - Local Representation"> - Record Types"> - The Zebra Configuration File"> - Safe Updating - Using Shadow - Registers"> -]> - - - - - zebraidx - 1 - - - - zebraidx - Zebra Administrative Tool - - - - - zebraidx - - - - - - - - - - - - - command - file - - - - DESCRIPTION - - zebraidx allows you to insert, delete or updates - records in Zebra. zebraidx accepts a set options - (see below) and exactly one command (mandatory). - - - - COMMANDS - &zebraidx-commands; - - - OPTIONS - &zebraidx-options; - - FILES - - zebra.cfg - - - SEE ALSO - - - zebrasrv - 8 - - - - See "shadow registers" in Zebra manual - - - See "administration" in Zebra manual - - - - - diff --git a/doc/zebraidx-options.xml b/doc/zebraidx-options.xml deleted file mode 100644 index 6e686b1..0000000 --- a/doc/zebraidx-options.xml +++ /dev/null @@ -1,137 +0,0 @@ - - - - - -t type - - - Update all files as type. Currently, the - types supported are text and - grs.subtype. - If no subtype is provided for the GRS - (General Record Structure) type, the canonical input format - is assumed (see &ref-architecture-representation;). - Generally, it is probably advisable to specify the record types - in the zebra.cfg file (see - &ref-record-types;), to avoid confusion at - subsequent updates. - - - - - -c config-file - - - Read the configuration file - config-file instead of - zebra.cfg. - - - - - -g group - - - Update the files according to the group - settings for group - (see &ref-configuration-file;). - - - - - -d database - - - The records located should be associated with the database name - database for access through the Z39.50 server. - - - - - - -l file - - - Write log messages to file instead - of stderr. - - - - - - -m mbytes - - - Use mbytes of memory before flushing - keys to background storage. This setting affects performance when - updating large databases. - - - - - -L - - - Makes zebraidx skip symbolic links. By default, zebraidx follows - them. - - - - - -n - - - Disable the use of shadow registers for this operation - (see &ref-shadow-registers;). - - - - - -s - - - Show analysis of the indexing process. The maintenance - program works in a read-only mode and doesn't change the state - of the index. This options is very useful when you wish to test a - new profile. - - - - - -V - - - Show Zebra version. - - - - - -v level - - - Set the log level to level. - level should be one of - none, debug, and - all. - - - - - diff --git a/doc/zebraidx.xml b/doc/zebraidx.xml index 2432195..151f246 100644 --- a/doc/zebraidx.xml +++ b/doc/zebraidx.xml @@ -1,16 +1,31 @@ - - - Running the Maintenance Interface (zebraidx) - - - The following is a complete reference to the command line interface to - the zebraidx application. - + + %local; + + %entities; + + %common; +]> + + + + ZEBRA + &version; + - - Syntax - - + + zebraidx + 1 + + + + zebraidx + Zebra Administrative Tool + + + zebraidx @@ -28,31 +43,225 @@ command file - - + - - Commands - &zebraidx-commands; + DESCRIPTION + + zebraidx allows you to insert, delete or updates + records in Zebra. zebraidx accepts a set options + (see below) and exactly one command (mandatory). + + + COMMANDS + + + update directory + + + Update the register with the files contained in + directory. + If no directory is provided, a list of files is read from + stdin. + See Administration in the Zebra + Manual. + + + + + delete directory + + + Remove the records corresponding to the files found under + directory from the register. + + + + + commit + + + Write the changes resulting from the last update + commands to the register. This command is only available if the use of + shadow register files is enabled + (see Shadow Registers in the + Zebra Manual). + + + + + clean + + Clean shadow files and "forget" changes. + + + + create database + + Create database. + + + + drop database + + Drop database (delete database). + + + + init + + Deletes an entire register (all files in shadow+register areas). + + + + + + OPTIONS + + + + -t type + + + Update all files as type. Currently, the + types supported are text, alvis + and grs.subtype. + Generally, it is probably advisable to specify the record types + in the zebra.cfg file (see + Record Types in the Zebra manual), + to avoid confusion at subsequent updates. + + + + + -c config-file + + + Read the configuration file + config-file instead of + zebra.cfg. + + + + + -g group + + + Update the files according to the group + settings for group + (see Zebra Configuration File in + the Zebra manual). + + + + + -d database + + + The records located should be associated with the database name + database for access through the Z39.50 server. + + + + + + -l file + + + Write log messages to file instead + of stderr. + + + + + + -m mbytes + + + Use mbytes of memory before flushing + keys to background storage. This setting affects performance when + updating large databases. + + + + + -L + + + Makes zebraidx skip symbolic links. By default, zebraidx follows + them. + + + + + -n + + + Disable the use of shadow registers for this operation + (see Shadow Registers in + the Zebra manual). + + + + + -s + + + Show analysis of the indexing process. The maintenance + program works in a read-only mode and doesn't change the state + of the index. This options is very useful when you wish to test a + new profile. + + + + + -V + + + Show Zebra version. + + + + + -v level + + + Set the log level to level. + level should be one of + none, debug, and + all. + + + + + + FILES - Options: - &zebraidx-options; - + zebra.cfg + + + SEE ALSO + + + zebrasrv + 8 + + + + - - + diff --git a/doc/zebrasrv-man.xml b/doc/zebrasrv-man.xml deleted file mode 100644 index 07aa192..0000000 --- a/doc/zebrasrv-man.xml +++ /dev/null @@ -1,144 +0,0 @@ - - %local; - - %entities; - - %common; - - - - - -]> - - - - - - - - - zebrasrv - 8 - - - - zebrasrv - Zebra Server - - - - - &zebrasrv-synopsis; - - DESCRIPTION - Zebra is a high-performance, general-purpose structured text indexing - and retrieval engine. It reads structured records in a variety of input - formats (eg. email, XML, MARC) and allows access to them through exact - boolean search expressions and relevance-ranked free-text queries. - - - zebrasrv is the Z39.50 and SRW/U frontend - server for the Zebra indexer. - - - On Unix you can run the zebrasrv - server from the command line - and put it - in the background. It may also operate under the inet daemon. - On WIN32 you can run the server as a console application or - as a WIN32 Service. - - - - - OPTIONS - - - The options for zebrasrv are the same - as those for YAZ' yaz-ztest. - Option -c specifies a Zebra configuration - file - if omitted zebra.cfg is read. - - - &zebrasrv-options; - - VIRTUAL HOSTS - - zebrasrv uses the YAZ server - - &zebrasrv-virtual; - - FILES - - zebra.cfg - - - SEE ALSO - - - zebraidx - 1 - , - - yaz-ztest - 8 - - - - Section "The Z39.50 Server" in the Zebra manual. - http://www.indexdata.dk/zebra/doc/server.tkl - - - Section "Virtual Hosts" in the YAZ manual. - http://www.indexdata.dk/yaz/doc/server.vhosts.tkl - - - Section "Specification of CQL to RPN mappings" in the YAZ manual. - http://www.indexdata.dk/yaz/doc/tools.tkl#tools.cql.map - - - The Zebra software is Copyright Index Data - http://www.indexdata.dk - and distributed under the - GPLv2 license. - - - - - diff --git a/doc/zebrasrv-options.xml b/doc/zebrasrv-options.xml index dcda2b0..ac884c4 100644 --- a/doc/zebrasrv-options.xml +++ b/doc/zebrasrv-options.xml @@ -1,5 +1,5 @@ @@ -34,7 +34,7 @@ sgml-minimize-attributes:nil sgml-always-quote-attributes:t sgml-indent-step:1 sgml-indent-data:t -sgml-parent-document: "yaz.xml" +sgml-parent-document: "zebrasrv.xml" sgml-local-catalogs: nil sgml-namecase-general:t End: diff --git a/doc/zebrasrv-virtual.xml b/doc/zebrasrv-virtual.xml index 248ab14..cc428f1 100644 --- a/doc/zebrasrv-virtual.xml +++ b/doc/zebrasrv-virtual.xml @@ -1,5 +1,5 @@ @@ -12,9 +12,9 @@ A backend can be configured to execute in a particular working directory. Or the YAZ frontend may perform CQL to RPN conversion, thus - allowing traditional Z39.50 backends to be offered as a SRW/ SRU - service. SRW/ SRU Explain information for a particular backend may also - be specified. + allowing traditional Z39.50 backends to be offered as a +SRU service. + SRU Explain information for a particular backend may also be specified. For the HTTP protocol, the virtual host is specified in the Host header. @@ -138,15 +138,15 @@ element explain (optional) - Specifies SRW/ SRU ZeeRex content for this server. Copied verbatim - to the client. As things are now, some of the Explain content - seems redundant because host information, etc. is also stored - elsewhere. + Specifies SRU ZeeRex content for this + server - copied verbatim to the client. + As things are now, some of the Explain content seems redundant + because host information, etc. is also stored elsewhere. The format of the Explain record is described in detail, with - examples, on the file ZeeRex web-site, - . + examples, on the file at the + ZeeRex web-site. @@ -218,7 +218,7 @@ sgml-minimize-attributes:nil sgml-always-quote-attributes:t sgml-indent-step:1 sgml-indent-data:t -sgml-parent-document: "yaz.xml" +sgml-parent-document: "zebrasrv.xml" sgml-local-catalogs: nil sgml-namecase-general:t End: diff --git a/doc/zebrasrv.xml b/doc/zebrasrv.xml new file mode 100644 index 0000000..43c45c4 --- /dev/null +++ b/doc/zebrasrv.xml @@ -0,0 +1,509 @@ + + %local; + + %entities; + + %common; +]> + + + + ZEBRA + &version; + + + + zebrasrv + 8 + + + + zebrasrv + Zebra Server + + + + &zebrasrv-synopsis; + + DESCRIPTION + Zebra is a high-performance, general-purpose structured text indexing + and retrieval engine. It reads structured records in a variety of input + formats (eg. email, XML, MARC) and allows access to them through exact + boolean search expressions and relevance-ranked free-text queries. + + + zebrasrv is the Z39.50 and SRU frontend + server for the Zebra search engine and indexer. + + + On Unix you can run the zebrasrv + server from the command line - and put it + in the background. It may also operate under the inet daemon. + On WIN32 you can run the server as a console application or + as a WIN32 Service. + + + + OPTIONS + + + The options for zebrasrv are the same + as those for YAZ' yaz-ztest. + Option -c specifies a Zebra configuration + file - if omitted zebra.cfg is read. + + + &zebrasrv-options; + + + + Z39.50 Protocol Support and Behavior + + + Z39.50 Initialization + + + During initialization, the server will negotiate to version 3 of the + Z39.50 protocol, and the option bits for Search, Present, Scan, + NamedResultSets, and concurrentOperations will be set, if requested by + the client. The maximum PDU size is negotiated down to a maximum of + 1 MB by default. + + + + + + Z39.50 Search + + + The supported query type are 1 and 101. All operators are currently + supported with the restriction that only proximity units of type "word" + are supported for the proximity operator. + Queries can be arbitrarily complex. + Named result sets are supported, and result sets can be used as operands + without limitations. + Searches may span multiple databases. + + + + The server has full support for piggy-backed retrieval (see + also the following section). + + + + + + Z39.50 Present + + The present facility is supported in a standard fashion. The requested + record syntax is matched against the ones supported by the profile of + each record retrieved. If no record syntax is given, SUTRS is the + default. The requested element set name, again, is matched against any + provided by the relevant record profiles. + + + + Z39.50 Scan + + The attribute combinations provided with the termListAndStartPoint are + processed in the same way as operands in a query (see above). + Currently, only the term and the globalOccurrences are returned with + the termInfo structure. + + + + Z39.50 Sort + + + Z39.50 specifies three different types of sort criteria. + Of these Zebra supports the attribute specification type in which + case the use attribute specifies the "Sort register". + Sort registers are created for those fields that are of type "sort" in + the default.idx file. + The corresponding character mapping file in default.idx specifies the + ordinal of each character used in the actual sort. + + + + Z39.50 allows the client to specify sorting on one or more input + result sets and one output result set. + Zebra supports sorting on one result set only which may or may not + be the same as the output result set. + + + + Z39.50 Close + + If a Close PDU is received, the server will respond with a Close PDU + with reason=FINISHED, no matter which protocol version was negotiated + during initialization. If the protocol version is 3 or more, the + server will generate a Close PDU under certain circumstances, + including a session timeout (60 minutes by default), and certain kinds of + protocol errors. Once a Close PDU has been sent, the protocol + association is considered broken, and the transport connection will be + closed immediately upon receipt of further data, or following a short + timeout. + + + + + Z39.50 Explain + + Zebra maintains a "classic" + Z39.50 Explain database + on the side. + This database is called IR-Explain-1 and can be + searched using the attribute set exp-1. + + + The records in the explain database are of type + grs.sgml. + The root element for the Explain grs.sgml records is + explain, thus + explain.abs is used for indexing. + + + + Zebra must be able to locate + explain.abs in order to index the Explain + records properly. Zebra will work without it but the information + will not be searchable. + + + + + + The SRU Server + + In addition to Z39.50, Zebra supports the more recent and + web-friendly IR protocol SRU. + SRU can be carried over SOAP or a REST-like protocol + that uses HTTP GET or POST to request search responses. The request + itself is made of parameters such as + query, + startRecord, + maximumRecords + and + recordSchema; + the response is an XML document containing hit-count, result-set + records, diagnostics, etc. SRU can be thought of as a re-casting + of Z39.50 semantics in web-friendly terms; or as a standardisation + of the ad-hoc query parameters used by search engines such as Google + and AltaVista; or as a superset of A9's OpenSearch (which it + predates). + + + Zebra supports Z39.50, SRU GET, SRU POST, SRU SOAP (SRW) + - on the same port, recognising what protocol is used by each incoming + requests and handling them accordingly. This is a achieved through + the use of Deep Magic; civilians are warned not to stand too close. + + + Running zebrasrv as an SRU Server + + Because Zebra supports all protocols on one port, it would + seem to follow that the SRU server is run in the same way as + the Z39.50 server, as described above. This is true, but only in + an uninterestingly vacuous way: a Zebra server run in this manner + will indeed recognise and accept SRU requests; but since it + doesn't know how to handle the CQL queries that these protocols + use, all it can do is send failure responses. + + + + It is possible to cheat, by having SRU search Zebra with + a PQF query instead of CQL, using the + x-pquery + parameter instead of + query. + This is a + non-standard extension + of CQL, and a + very naughty + thing to do, but it does give you a way to see Zebra serving SRU + ``right out of the box''. If you start your favourite Zebra + server in the usual way, on port 9999, then you can send your web + browser to: + + + http://localhost:9999/Default?version=1.1 + &operation=searchRetrieve + &x-pquery=mineral + &startRecord=1 + &maximumRecords=1 + + + This will display the XML-formatted SRU response that includes the + first record in the result-set found by the query + mineral. (For clarity, the SRU URL is shown + here broken across lines, but the lines should be joined to gether + to make single-line URL for the browser to submit.) + + + + In order to turn on Zebra's support for CQL queries, it's necessary + to have the YAZ generic front-end (which Zebra uses) translate them + into the Z39.50 Type-1 query format that is used internally. And + to do this, the generic front-end's own configuration file must be + used. See ; + the salient point for SRU support is that + zebrasrv + must be started with the + -f frontendConfigFile + option rather than the + -c zebraConfigFile + option, + and that the front-end configuration file must include both a + reference to the Zebra configuration file and the CQL-to-PQF + translator configuration file. + + + A minimal front-end configuration file that does this would read as + follows: + + + + + zebra.cfg + ../../tab/pqf.properties + + +]]> + + The + <config> + element contains the name of the Zebra configuration file that was + previously specified by the + -c + command-line argument, and the + <cql2rpn> + element contains the name of the CQL properties file specifying how + various CQL indexes, relations, etc. are translated into Type-1 + queries. + + + A zebra server running with such a configuration can then be + queried using proper, conformant SRU URLs with CQL queries: + + + http://localhost:9999/Default?version=1.1 + &operation=searchRetrieve + &query=title=utah and description=epicent* + &startRecord=1 + &maximumRecords=1 + + + + + SRU Protocol Support and Behavior + + Zebra running as an SRU server supports SRU version 1.1, including + CQL version 1.1. In particular, it provides support for the + following elements of the protocol. + + + + SRU Search and Retrieval + + Zebra supports the + SRU searchRetrieve + operation. + + + One of the great strengths of SRU is that it mandates a standard + query language, CQL, and that all conforming implementations can + therefore be trusted to correctly interpret the same queries. It + is with some shame, then, that we admit that Zebra also supports + an additional query language, our own Prefix Query Format + (PQF). + A PQF query is submitted by using the extension parameter + x-pquery, + in which case the + query + parameter must be omitted, which makes the request not valid SRU. + Please don't do this. + + + + + SRU Scan + + Zebra supports SRU scan + operation. + Scanning using CQL syntax is the default, where the + standard scanClause parameter is used. + + + In addition, a + mutant form of SRU scan is supported, using + the non-standard x-pScanClause parameter in + place of the standard scanClause to scan on a + PQF query clause. + + + + + SRU Explain + + Zebra supports SRU explain. + + + The ZeeRex record explaining a database may be requested either + with a fully fledged SRU request (with + operation=explain + and version-number specified) + or with a simple HTTP GET at the server's basename. + The ZeeRex record returned in response is the one embedded + in the YAZ Frontend Server configuration file that is described in the + . + + + Unfortunately, the data found in the + CQL-to-PQF text file must be added by hand-craft into the explain + section of the YAZ Frontend Server configuration file to be able + to provide a suitable explain record. + Too bad, but this is all extreme + new alpha stuff, and a lot of work has yet to be done .. + + + There is no linkeage whatsoever between the Z39.50 explain model + and the SRU explain response (well, at least not implemented + in Zebra, that is ..). Zebra does not provide a means using + Z39.50 to obtain the ZeeRex record. + + + + + Other SRU operations + + In the Z39.50 protocol, Initialization, Present, Sort and Close + are separate operations. In SRU, however, these operations do not + exist. + + + + + SRU has no explicit initialization handshake phase, but + commences immediately with searching, scanning and explain + operations. + + + + + Neither does SRU have a close operation, since the protocol is + stateless and each request is self-contained. (It is true that + multiple SRU request/response pairs may be implemented as + multiple HTTP request/response pairs over a single persistent + TCP/IP connection; but the closure of that connection is not a + protocol-level operation.) + + + + + Retrieval in SRU is part of the + searchRetrieve operation, in which a search + is submitted and the response includes a subset of the records + in the result set. There is no direct analogue of Z39.50's + Present operation which requests records from an established + result set. In SRU, this is achieved by sending a subsequent + searchRetrieve request with the query + cql.resultSetId=id where + id is the identifier of the previously + generated result-set. + + + + + Sorting in CQL is done within the + searchRetrieve operation - in v1.1, by an + explicit sort parameter, but the forthcoming + v1.2 or v2.0 will most likely use an extension of the query + language, CQL sorting. + + + + + It can be seen, then, that while Zebra operating as an SRU server + does not provide the same set of operations as when operating as a + Z39.50 server, it does provide equivalent functionality. + + + + + + SRU Examples + + Surf into http://localhost:9999 + to get an explain response, or use + + + + See number of hits for a query + + + + Fetch record 5-7 in Dublin Core format + + + + Even search using PQF queries using the extended naughty + verb x-pquery + + + + Or scan indexes using the extended extremely naughty + verb x-pScanClause + + Don't do this in production code! + But it's a great fast debugging aid. + + + + + YAZ server virtual hosts + &zebrasrv-virtual; + + + SEE ALSO + + + zebraidx + 1 + + + + + + -- 1.7.10.4