X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=doc%2Ftools.xml;h=1d79005aa4e9b89fd29175ca96f406b31d5c0fa3;hp=accbb5959a0de907dd38cd964a85314928cc82b4;hb=95d8bd04e10519a635972a24176270ef4dbe8d2c;hpb=d1ad544488a515cb6ad80d1b98863baa57abb567 diff --git a/doc/tools.xml b/doc/tools.xml index accbb59..1d79005 100644 --- a/doc/tools.xml +++ b/doc/tools.xml @@ -1,4 +1,4 @@ - + Supporting Tools @@ -225,7 +225,7 @@ @and. Its semantics are described in section 3.7.2 (Proximity) of Z39.50 the standard itself, which can be read on-line at - + In PQF, the proximity operation is represented by a sequence @@ -294,108 +294,126 @@ (The numeric values of the relation and well-known unit-code parameters are taken straight from - the ASN.1 of the proximity structure in the standard.) PQF queries - Queries using simple terms. - - dylan - "bob dylan" - - - Boolean operators. - - @or "dylan" "zimmerman" - @and @or dylan zimmerman when - @and when @or dylan zimmerman - - - - Reference to result sets. - - @set Result-1 - @and @set seta setb - - - - Attributes for terms. - - @attr 1=4 computer - @attr 1=4 @attr 4=1 "self portrait" - @attrset exp1 @attr 1=1 CategoryList - @attr gils 1=2008 Copenhagen - @attr 1=/book/title computer - - - - Proximity. - - @prox 0 3 1 2 k 2 dylan zimmerman - - - Here the parameters 0, 3, 1, 2, k and 2 represent exclusion, - distance, ordered, relation, which-code and unit-code, in that - order. So: - - - exclusion = 0: the proximity condition must hold - - - distance = 3: the terms must be three units apart - - - ordered = 1: they must occur in the order they are specified - - - relation = 2: lessThanOrEqual (to the distance of 3 units) - - - which-code is ``known'', so the standard unit-codes are used - - - unit-code = 2: word. - - - So the whole proximity query means that the words - dylan and zimmerman must - both occur in the record, in that order, differing in position - by three or fewer words (i.e. with two or fewer words between - them.) The query would find ``Bob Dylan, aka. Robert - Zimmerman'', but not ``Bob Dylan, born as Robert Zimmerman'' - since the distance in this case is four. - - - - Specifying term type. - - @term string "a UTF-8 string, maybe?" - - - Mixed queries - - @or @and bob dylan @set Result-1 - - @attr 4=1 @and @attr 1=1 "bob dylan" @attr 1=4 "slow train coming" - - @and @attr 2=4 @attr gils 1=2038 -114 @attr 2=2 @attr gils 1=2039 -109 + PQF queries using simple terms + + + dylan + + "bob dylan" + + + + PQF boolean operators + + + @or "dylan" "zimmerman" + + @and @or dylan zimmerman when + + @and when @or dylan zimmerman + + + + PQF references to result sets + + + @set Result-1 + + @and @set seta @set setb + + + + Attributes for terms + + + @attr 1=4 computer + + @attr 1=4 @attr 4=1 "self portrait" + + @attrset exp1 @attr 1=1 CategoryList + + @attr gils 1=2008 Copenhagen + + @attr 1=/book/title computer + + + + PQF Proximity queries + + + @prox 0 3 1 2 k 2 dylan zimmerman + + + Here the parameters 0, 3, 1, 2, k and 2 represent exclusion, + distance, ordered, relation, which-code and unit-code, in that + order. So: + + + exclusion = 0: the proximity condition must hold + + + distance = 3: the terms must be three units apart + + + ordered = 1: they must occur in the order they are specified + + + relation = 2: lessThanOrEqual (to the distance of 3 units) + + + which-code is ``known'', so the standard unit-codes are used + + + unit-code = 2: word. + + + So the whole proximity query means that the words + dylan and zimmerman must + both occur in the record, in that order, differing in position + by three or fewer words (i.e. with two or fewer words between + them.) The query would find ``Bob Dylan, aka. Robert + Zimmerman'', but not ``Bob Dylan, born as Robert Zimmerman'' + since the distance in this case is four. + + + + PQF specification of search term + + + @term string "a UTF-8 string, maybe?" + + + + PQF mixed queries + + + @or @and bob dylan @set Result-1 + + @attr 4=1 @and @attr 1=1 "bob dylan" @attr 1=4 "slow train coming" + + @and @attr 2=4 @attr gils 1=2038 -114 @attr 2=2 @attr gils 1=2039 -109 - + - The last of these examples is a spatial search: in - the GILS attribute set, - access point - 2038 indicates West Bounding Coordinate and - 2030 indicates East Bounding Coordinate, - so the query is for areas extending from -114 degrees - to no more than -109 degrees. + access point + 2038 indicates West Bounding Coordinate and + 2030 indicates East Bounding Coordinate, + so the query is for areas extending from -114 degrees + to no more than -109 degrees. - - + + + CCL @@ -411,8 +429,7 @@ - The EUROPAGATE - research project working under the Libraries programme + The EUROPAGATE research project working under the Libraries programme of the European Commission's DG XIII has, amongst other useful tools, implemented a general-purpose CCL parser which produces an output structure that can be trivially converted to the internal RPN @@ -566,7 +583,7 @@ u=value - Use attribute. Common use attributes are + Use attribute (1). Common use attributes are 1 Personal-name, 4 Title, 7 ISBN, 8 ISSN, 30 Date, 62 Subject, 1003 Author), 1016 Any. Specify value as an integer. @@ -576,7 +593,7 @@ r=value - Relation attribute. Common values are + Relation attribute (2). Common values are 1 <, 2 <=, 3 =, 4 >=, 5 >, 6 <>, 100 phonetic, 101 stem, 102 relevance, 103 always matches. @@ -585,7 +602,7 @@ p=value - Position attribute. Values: 1 first in field, 2 + Position attribute (3). Values: 1 first in field, 2 first in any subfield, 3 any position in field. @@ -593,7 +610,7 @@ s=value - Structure attribute. Values: 1 phrase, 2 word, + Structure attribute (4). Values: 1 phrase, 2 word, 3 key, 4 year, 5 date, 6 word list, 100 date (un), 101 name (norm), 102 name (un), 103 structure, 104 urx, 105 free-form-text, 106 document-text, 107 local-number, @@ -604,7 +621,7 @@ t=value - Truncation attribute. Values: 1 right, 2 left, + Truncation attribute (5). Values: 1 right, 2 left, 3 left& right, 100 none, 101 process #, 102 regular-1, 103 regular-2, 104 CCL. @@ -613,7 +630,7 @@ c=value - Completeness attribute. Values: 1 incomplete subfield, + Completeness attribute (6). Values: 1 incomplete subfield, 2 complete subfield, 3 complete field. @@ -623,10 +640,8 @@ - The complete list of Bib-1 attributes can be found - - here - . + Refer to the complete + list of Bib-1 attributes It is also possible to specify non-numeric attribute values, @@ -664,8 +679,27 @@ r=o - Allows operators greather-than, less-than, ... equals and - sets relation attribute accordingly (relation ordered). + Allows ranges and the operators greather-than, less-than, ... + equals. + This sets Bib-1 relation attribute accordingly (relation + ordered). A query construct is only treated as a range if + dash is used and that is surrounded by white-space. So + -1980 is treated as term + "-1980" not <= 1980. + If - 1980 is used, however, that is + treated as a range. + + + + r=r + Similar to r=o but assumes that terms + are non-negative (not prefixed with -). + Thus, a dash will always be treated as a range. + The construct 1980-1990 is + treated as a range with r=r but as a + single term "1980-1990" with + r=o. The special attribute + r=r is available in YAZ 2.0.24 or later. @@ -713,11 +747,6 @@ date u=30 r=o - Four qualifiers are defined - ti, - au, ranked and - date. - - ti and au both set structure attribute to phrase (s=1). ti @@ -740,9 +769,9 @@ Query - year > 1980 + date > 1980 - is a valid query, while + is a valid query. But ti > 1980 @@ -763,7 +792,7 @@ be an alias for q1, q2... such that the CCL query q=x is equivalent to - q1=x or w2=x or .... + q1=x or q2=x or .... @@ -909,10 +938,9 @@ struct ccl_rpn_node *ccl_find_str (CCL_bibset bibset, const char *str, CQL - CQL + CQL - Common Query Language - was defined for the - SRW - protocol. + SRW protocol. In many ways CQL has a similar syntax to CCL. The objective of CQL is different. Where CCL aims to be an end-user language, CQL is the protocol @@ -921,8 +949,7 @@ struct ccl_rpn_node *ccl_find_str (CCL_bibset bibset, const char *str, If you are new to CQL, read the - Gentle - Introduction. + Gentle Introduction. @@ -949,8 +976,7 @@ struct ccl_rpn_node *ccl_find_str (CCL_bibset bibset, const char *str, The parser converts CQL to - - XCQL. + XCQL. XCQL is an XML representation of CQL. XCQL is part of the SRW specification. However, since SRU supports CQL only, we don't expect XCQL to be widely used. @@ -1026,34 +1052,28 @@ struct cql_node *cql_parser_result(CQL_parser cp); #define CQL_NODE_ST 1 #define CQL_NODE_BOOL 2 -#define CQL_NODE_MOD 3 struct cql_node { int which; union { struct { char *index; + char *index_uri; char *term; char *relation; + char *relation_uri; struct cql_node *modifiers; - struct cql_node *prefixes; } st; struct { char *value; struct cql_node *left; struct cql_node *right; struct cql_node *modifiers; - struct cql_node *prefixes; } boolean; - struct { - char *name; - char *value; - struct cql_node *next; - } mod; } u; }; - There are three kinds of nodes, search term (ST), boolean (BOOL), - and modifier (MOD). + There are two node types: search term (ST) and boolean (BOOL). + A modifier is treated as a search term too. The search term node has five members: @@ -1067,6 +1087,12 @@ struct cql_node { + index_uri: index URi for search term + or NULL if none could be resolved for the index. + + + + term: the search term itself. @@ -1077,18 +1103,14 @@ struct cql_node { - modifiers: relation modifiers for search - term. The modifiers is a simple linked - list (NULL for last entry). Each relation modifier node - is of type MOD. + relation_uri: relation URI for search term. - prefixes: index prefixes for search - term. The prefixes is a simple linked - list (NULL for last entry). Each prefix node - is of type MOD. + modifiers: relation modifiers for search + term. The modifiers list itself of cql_nodes + each of type ST. @@ -1110,37 +1132,6 @@ struct cql_node { modifiers: proximity arguments. - - - prefixes: index prefixes. - The prefixes is a simple linked - list (NULL for last entry). Each prefix node - is of type MOD. - - - - - - - The modifier node is a "utility" node used for name-value pairs, - such as prefixes, proximity arguements, etc. - - - - name name of mod node. - - - - - value value of mod node. - - - - - next: pointer to next node which is - always a mod node (NULL for last entry). - - @@ -1188,7 +1179,7 @@ int cql_transform_buf(cql_transform_t ct, returns a non-zero SRW error code; otherwise zero is returned (conversion successful). The meanings of the numeric error codes are listed in the SRW specifications at - + If conversion fails, more information can be obtained by calling @@ -1199,9 +1190,9 @@ int cql_transform_error(cql_transform_t ct, char **addinfop); error-code and sets the string-pointer at *addinfop to point to a string containing additional information about the error that occurred: for - example, if the error code is 15 (``Illegal or unsupported index + example, if the error code is 15 (``Illegal or unsupported context set''), the additional information is the name of the requested - index set that was not recognised. + context set that was not recognised. The SRW error-codes may be translated into brief human-readable @@ -1228,7 +1219,7 @@ int cql_transform_FILE(cql_transform_t ct, - Specification of CQL to RPN mapping + Specification of CQL to RPN mappings The file supplied to functions cql_transform_open_FILE, @@ -1257,26 +1248,37 @@ int cql_transform_FILE(cql_transform_t ct, The following CQL patterns are recognized: - qualifier.set.name + index.set.name - This pattern is invoked when a CQL qualifier, such as + This pattern is invoked when a CQL index, such as dc.title is converted. set - and name is the index set and qualifier + and name are the context set and index name respectively. Typically, the RPN specifies an equivalent use attribute. - For terms not bound by a qualifier the pattern - qualifier.srw.serverChoice is used. - Here, the prefix srw is defined as - http://www.loc.gov/zing/cql/srw-indexes/v1.0/. + For terms not bound by an index the pattern + index.cql.serverChoice is used. + Here, the prefix cql is defined as + http://www.loc.gov/zing/cql/cql-indexes/v1.0/. If this pattern is not defined, the mapping will fail. + qualifier.set.name + (DEPRECATED) + + + + For backwards compatibility, this is recognised as a synonym of + index.set.name + + + + relation.relation @@ -1358,10 +1360,10 @@ int cql_transform_FILE(cql_transform_t ct, - This specification defines a CQL index set for a given prefix. + This specification defines a CQL context set for a given prefix. The value on the right hand side is the URI for the set - not RPN. All prefixes used in - qualifier patterns must be defined this way. + index patterns must be defined this way. @@ -1369,16 +1371,16 @@ int cql_transform_FILE(cql_transform_t ct, CQL to RPN mapping file - This simple file defines two index sets, three qualifiers and three + This simple file defines two context sets, three indexes and three relations, a position pattern and a default structure. @attr 1=1016 @attr 2=3 @attr 4=1 @attr 3=3 @attr 6=1 "computer" - by rules qualifier.srw.serverChoice, + by rules index.cql.serverChoice, relation.scr, structure.*, position.any. @@ -1601,7 +1603,7 @@ typedef struct oident again, corresponding to the specific OIDs defined by the standard. Refer to the - + Registry of Z39.50 Object Identifiers for the whole list. @@ -1748,7 +1750,7 @@ typedef struct oident release the associated memory again. For the structures describing the Z39.50 PDUs and related structures, it is convenient to use the memory-management system of the &odr; subsystem (see - Using ODR). However, in some circumstances + ). However, in some circumstances where you might otherwise benefit from using a simple nibble memory management system, it may be impractical to use odr_malloc() and odr_reset(). @@ -1798,6 +1800,267 @@ typedef struct oident + + Log + + &yaz; has evolved a fairly complex log system which should be useful both + for debugging &yaz; itself, debugging applications that use &yaz;, and for + production use of those applications. + + + The log functions are declared in header yaz/log.h + and implemented in src/log.c. + Due to name clash with syslog and some math utilities the logging + interface has been modified as of YAZ 2.0.29. The obsolete interface + is still available if in header file yaz/log.h. + The key points of the interface are: + + + void yaz_log(int level, const char *fmt, ...) + + void yaz_log_init(int level, const char *prefix, const char *name); + void yaz_log_init_file(const char *fname); + void yaz_log_init_level(int level); + void yaz_log_init_prefix(const char *prefix); + void yaz_log_time_format(const char *fmt); + void yaz_log_init_max_size(int mx); + + int yaz_log_mask_str(const char *str); + int yaz_log_module_level(const char *name); + + + + The reason for the whole log module is the yaz_log + function. It takes a bitmask indicating the log levels, a + printf-like format string, and a variable number of + arguments to log. + + + + The log level is a bit mask, that says on which level(s) + the log entry should be made, and optionally set some behaviour of the + logging. In the most simple cases, it can be one of YLOG_FATAL, + YLOG_DEBUG, YLOG_WARN, YLOG_LOG. Those can be combined with bits + that modify the way the log entry is written:YLOG_ERRNO, + YLOG_NOTIME, YLOG_FLUSH. + Most of the rest of the bits are deprecated, and should not be used. Use + the dynamic log levels instead. + + + + Applications that use &yaz;, should not use the LOG_LOG for ordinary + messages, but should make use of the dynamic loglevel system. This consists + of two parts, defining the loglevel and checking it. + + + + To define the log levels, the (main) program should pass a string to + yaz_log_mask_str to define which log levels are to be + logged. This string should be a comma-separated list of log level names, + and can contain both hard-coded names and dynamic ones. The log level + calculation starts with YLOG_DEFAULT_LEVEL and adds a bit + for each word it meets, unless the word starts with a '-', in which case it + clears the bit. If the string 'none' is found, + all bits are cleared. Typically this string comes from the command-line, + often identified by -v. The + yaz_log_mask_str returns a log level that should be + passed to yaz_log_init_level for it to take effect. + + + + Each module should check what log bits it should be used, by calling + yaz_log_module_level with a suitable name for the + module. The name is cleared from a preceding path and an extension, if any, + so it is quite possible to use __FILE__ for it. If the + name has been passed to yaz_log_mask_str, the routine + returns a non-zero bitmask, which should then be used in consequent calls + to yaz_log. (It can also be tested, so as to avoid unnecessary calls to + yaz_log, in time-critical places, or when the log entry would take time + to construct.) + + + + Yaz uses the following dynamic log levels: + server, session, request, requestdetail for the server + functionality. + zoom for the zoom client api. + ztest for the simple test server. + malloc, nmem, odr, eventl for internal debugging of yaz itself. + Of course, any program using yaz is welcome to define as many new ones, as + it needs. + + + + By default the log is written to stderr, but this can be changed by a call + to yaz_log_init_file or + yaz_log_init. If the log is directed to a file, the + file size is checked at every write, and if it exceeds the limit given in + yaz_log_init_max_size, the log is rotated. The + rotation keeps one old version (with a .1 appended to + the name). The size defaults to 1GB. Setting it to zero will disable the + rotation feature. + + + + A typical yaz-log looks like this + 13:23:14-23/11 yaz-ztest(1) [session] Starting session from tcp:127.0.0.1 (pid=30968) + 13:23:14-23/11 yaz-ztest(1) [request] Init from 'YAZ' (81) (ver 2.0.28) OK + 13:23:17-23/11 yaz-ztest(1) [request] Search Z: @attrset Bib-1 foo OK:7 hits + 13:23:22-23/11 yaz-ztest(1) [request] Present: [1] 2+2 OK 2 records returned + 13:24:13-23/11 yaz-ztest(1) [request] Close OK + + + + The log entries start with a time stamp. This can be omitted by setting the + YLOG_NOTIME bit in the loglevel. This way automatic tests + can be hoped to produce identical log files, that are easy to diff. The + format of the time stamp can be set with + yaz_log_time_format, which takes a format string just + like strftime. + + + + Next in a log line comes the prefix, often the name of the program. For + yaz-based servers, it can also contain the session number. Then + comes one or more logbits in square brackets, depending on the logging + level set by yaz_log_init_level and the loglevel + passed to yaz_log_init_level. Finally comes the format + string and additional values passed to yaz_log + + + + The log level YLOG_LOGLVL, enabled by the string + loglevel, will log all the log-level affecting + operations. This can come in handy if you need to know what other log + levels would be useful. Grep the logfile for [loglevel]. + + + + The log system is almost independent of the rest of &yaz;, the only + important dependence is of nmem, and that only for + using the semaphore definition there. + + + + The dynamic log levels and log rotation were introduced in &yaz; 2.0.28. At + the same time, the log bit names were changed from + LOG_something to YLOG_something, + to avoid collision with syslog.h. + + + + + MARC + + + YAZ provides a fast utility that decodes MARC records and + encodes to a varity of output formats. The MARC records must + be encoded in ISO2709. + + + + /* create handler */ + yaz_marc_t yaz_marc_create(void); + /* destroy */ + void yaz_marc_destroy(yaz_marc_t mt); + + /* set XML mode YAZ_MARC_LINE, YAZ_MARC_SIMPLEXML, ... */ + void yaz_marc_xml(yaz_marc_t mt, int xmlmode); + #define YAZ_MARC_LINE 0 + #define YAZ_MARC_SIMPLEXML 1 + #define YAZ_MARC_OAIMARC 2 + #define YAZ_MARC_MARCXML 3 + #define YAZ_MARC_ISO2709 4 + #define YAZ_MARC_XCHANGE 5 + + /* supply iconv handle for character set conversion .. */ + void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd); + + /* set debug level, 0=none, 1=more, 2=even more, .. */ + void yaz_marc_debug(yaz_marc_t mt, int level); + + /* decode MARC in buf of size bsize. Returns >0 on success; <=0 on failure. + On success, result in *result with size *rsize. */ + int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize, + char **result, int *rsize); + + /* decode MARC in buf of size bsize. Returns >0 on success; <=0 on failure. + On success, result in WRBUF */ + int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, + int bsize, WRBUF wrbuf); +]]> + + + A MARC conversion handle must be created by using + yaz_marc_create and destroyed + by calling yaz_marc_destroy. + + + All other function operate on a yaz_marc_t handle. + The output is specified by a call to yaz_marc_xml. + The xmlmode must be one of + + + YAZ_MARC_LINE + + + A simple line-by-line format suitable for display but not + recommend for further (machine) processing. + + + + + + YAZ_MARC_MARXML + + + The resulting record is converted to MARCXML. + + + + + + YAZ_MARC_ISO2709 + + + The resulting record is converted to ISO2709 (MARC). + + + + + + + The actual conversion functions are + yaz_marc_decode_buf and + yaz_marc_decode_wrbuf which decodes and encodes + a MARC record. The former function operates on simple buffers, the + stores the resulting record in a WRBUF handle (WRBUF is a simple string + type). + + + Display of MARC record + + The followint program snippet illustrates how the MARC API may + be used to convert a MARC record to the line-by-line format: + + + + + +