X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=doc%2Ftools.xml;h=25443163387e6530d0e98596f730d7f5b17896b3;hp=4b0d62bba5f334c7c6b4cf4933e57730209bec11;hb=9fc96078f8d79ad107515c3e450900cce1bf8605;hpb=c7cff6778b8d69f6e3c41dbfd939f44436911b90 diff --git a/doc/tools.xml b/doc/tools.xml index 4b0d62b..2544316 100644 --- a/doc/tools.xml +++ b/doc/tools.xml @@ -1,6 +1,5 @@ - Supporting Tools - + In support of the service API - primarily the ASN module, which provides the pro-grammatic interface to the Z39.50 APDUs, &yaz; contains @@ -41,7 +40,7 @@ - The PQF is defined by the pquery module in the YAZ library. + The PQF is defined by the pquery module in the YAZ library. There are two sets of function that have similar behavior. First set operates on a PQF parser handle, second set doesn't. First set set of functions are more flexible than the second set. Second set @@ -53,17 +52,16 @@ #include <yaz/pquery.h> - YAZ_PQF_Parser yaz_pqf_create (void); + YAZ_PQF_Parser yaz_pqf_create(void); - void yaz_pqf_destroy (YAZ_PQF_Parser p); + void yaz_pqf_destroy(YAZ_PQF_Parser p); - Z_RPNQuery *yaz_pqf_parse (YAZ_PQF_Parser p, ODR o, const char *qbuf); + Z_RPNQuery *yaz_pqf_parse(YAZ_PQF_Parser p, ODR o, const char *qbuf); - Z_AttributesPlusTerm *yaz_pqf_scan (YAZ_PQF_Parser p, ODR o, + Z_AttributesPlusTerm *yaz_pqf_scan(YAZ_PQF_Parser p, ODR o, Odr_oid **attributeSetId, const char *qbuf); - - int yaz_pqf_error (YAZ_PQF_Parser p, const char **msg, size_t *off); + int yaz_pqf_error(YAZ_PQF_Parser p, const char **msg, size_t *off); A PQF parser is created and destructed by functions @@ -74,7 +72,7 @@ a Z39.50 RPN Query is returned which is created using ODR stream o. If parsing failed, a NULL pointer is returned. - Function yaz_pqf_scan takes a scan query in + Function yaz_pqf_scan takes a scan query in qbuf. If parsing was successful, the function returns attributes plus term pointer and modifies attributeSetId to hold attribute set for the @@ -92,12 +90,12 @@ #include <yaz/pquery.h> - Z_RPNQuery *p_query_rpn (ODR o, oid_proto proto, const char *qbuf); + Z_RPNQuery *p_query_rpn(ODR o, oid_proto proto, const char *qbuf); - Z_AttributesPlusTerm *p_query_scan (ODR o, oid_proto proto, + Z_AttributesPlusTerm *p_query_scan(ODR o, oid_proto proto, Odr_oid **attributeSetP, const char *qbuf); - int p_query_attset (const char *arg); + int p_query_attset(const char *arg); The function p_query_rpn() takes as arguments an @@ -111,7 +109,7 @@ If the parse went well, p_query_rpn() returns a pointer to a Z_RPNQuery structure which can be - placed directly into a Z_SearchRequest. + placed directly into a Z_SearchRequest. If parsing failed, due to syntax error, a NULL pointer is returned. @@ -171,7 +169,7 @@ - The @attr operator is followed by an attribute specification + The @attr operator is followed by an attribute specification (attr-spec above). The specification consists of an optional attribute set, an attribute type-value pair and a sub-query. The attribute type-value pair is packed in one string: @@ -195,7 +193,7 @@ is used. This is the only encoding allowed in both versions 2 and 3 of the Z39.50 standard. - + Using Proximity Operators with PQF @@ -222,7 +220,7 @@ The proximity operator @prox is a special and more restrictive version of the conjunction operator - @and. Its semantics are described in + @and. Its semantics are described in section 3.7.2 (Proximity) of Z39.50 the standard itself, which can be read on-line at @@ -402,9 +400,9 @@ @or @and bob dylan @set Result-1 - + @attr 4=1 @and @attr 1=1 "bob dylan" @attr 1=4 "slow train coming" - + @and @attr 2=4 @attr gils 1=2038 -114 @attr 2=2 @attr gils 1=2039 -109 @@ -435,17 +433,6 @@ symbolic language for expressing boolean query structures. - - The EUROPAGATE research project working under the Libraries programme - of the European Commission's DG XIII has, amongst other useful tools, - implemented a general-purpose CCL parser which produces an output - structure that can be trivially converted to the internal RPN - representation of &yaz; (The Z_RPNQuery structure). - Since the CCL utility - along with the rest of the software - produced by EUROPAGATE - is made freely available on a liberal - license, it is included as a supplement to &yaz;. - - CCL Syntax @@ -495,45 +482,51 @@ -- Proximity operator - + CCL queries The following queries are all valid: - + dylan - + "bob dylan" - + dylan or zimmerman - + set=1 - + (dylan and bob) or set=1 - + + righttrunc? + + "notrunc?" + + singlechar#mask + Assuming that the qualifiers ti, au and date are defined we may use: - + ti=self portrait - + au=(bob dylan and slow train coming) date>1980 and (ti=((self portrait))) - + - + CCL Qualifiers - + Qualifiers are used to direct the search to a particular searchable index, such as title (ti) and author indexes (au). The CCL standard @@ -559,13 +552,13 @@ A qualifier specification is of the form: - + - qualifier-name + qualifier-name [attributeset,]type=val - [attributeset,]type=val ... + [attributeset,]type=val ... - + where qualifier-name is the name of the qualifier to be used (eg. ti), @@ -574,7 +567,7 @@ val is attribute value. The type can be specified as an integer or as it be specified either as a single-letter: - u for use, + u for use, r for relation,p for position, s for structure,t for truncation or c for completeness. @@ -652,14 +645,14 @@ - Refer to the complete + Refer to or the complete list of Bib-1 attributes - It is also possible to specify non-numeric attribute values, + It is also possible to specify non-numeric attribute values, which are used in combination with certain types. The special combinations are: - + Special attribute combos @@ -684,26 +677,37 @@ This does not set the structure at all. - + s=ol Each token in the term is ORed. (or-list). This does not set the structure at all. - + + s=ag + Tokens that appears as phrases (with blank in them) gets + structure phrase attached. Tokens that appers as words + gets structure phrase attached. Phrases and words are + ANDed. This is a variant of s=al and s=pw, with the main + difference that words are not split (with operator AND) + but instead kept in one RPN token. This facility appeared + in YAZ 4.2.38. + + + r=o Allows ranges and the operators greather-than, less-than, ... equals. This sets Bib-1 relation attribute accordingly (relation ordered). A query construct is only treated as a range if dash is used and that is surrounded by white-space. So - -1980 is treated as term + -1980 is treated as term "-1980" not <= 1980. If - 1980 is used, however, that is treated as a range. - + r=r Similar to r=o but assumes that terms are non-negative (not prefixed with -). @@ -715,27 +719,27 @@ r=r is available in YAZ 2.0.24 or later. - + t=l Allows term to be left-truncated. If term is of the form ?x, the resulting Type-1 term is x and truncation is left. - + t=r Allows term to be right-truncated. If term is of the form x?, the resulting Type-1 term is x and truncation is right. - + t=n If term is does not include ?, the truncation attribute is set to none (100). - + t=b Allows term to be both left&right truncated. If term is of the form ?x?, the @@ -743,6 +747,24 @@ set to both left&right. + + t=x + Allows masking anywhere in a term, thus fully supporting + # (mask one character) and ? (zero or more of any). + If masking is used, trunction is set to 102 (regexp-1 in term) + and the term is converted accordingly to a regular expression. + + + + t=z + Allows masking anywhere in a term, thus fully supporting + # (mask one character) and ? (zero or more of any). + If masking is used, trunction is set to 104 (Z39.58 in term) + and the term is converted accordingly to Z39.58 masking term - + actually the same truncation as CCL itself. + + +
@@ -751,7 +773,7 @@ Consider the following definition: - + ti u=4 s=1 au u=1 s=1 @@ -760,7 +782,7 @@ date u=30 r=o - ti and au both set + ti and au both set structure attribute to phrase (s=1). ti sets the use-attribute to 4. au sets the @@ -773,7 +795,7 @@ You can combine attributes. To Search for "ranked title" you - can do + can do ti,ranked=knuth computer @@ -798,12 +820,12 @@ A qualifier alias is of the form: - q + q q1 q2 .. which declares q to - be an alias for q1, + be an alias for q1, q2... such that the CCL query q=x is equivalent to q1=x or q2=x or .... @@ -856,10 +878,10 @@ case - Specificies if CCL operatores and qualifiers should be - compared with case sensitivity or not. Specify 0 for - case sensitive; 1 for case insensitive. - 0 + Specifies if CCL operators and qualifiers should be + compared with case sensitivity or not. Specify 1 for + case sensitive; 0 for case insensitive. + 1 @@ -911,8 +933,8 @@ To parse a simple string with a FIND query use the function -struct ccl_rpn_node *ccl_find_str (CCL_bibset bibset, const char *str, - int *error, int *pos); +struct ccl_rpn_node *ccl_find_str(CCL_bibset bibset, const char *str, + int *error, int *pos); which takes the CCL profile (bibset) and query @@ -966,7 +988,7 @@ struct ccl_rpn_node *ccl_find_str (CCL_bibset bibset, const char *str, - If you are new to CQL, read the + If you are new to CQL, read the Gentle Introduction. @@ -1049,7 +1071,7 @@ int cql_parser_stdio(CQL_parser cp, FILE *f); uses a FILE handle which is opened for reading.
- + CQL tree The the query string is valid, the CQL parser @@ -1064,12 +1086,13 @@ struct cql_node *cql_parser_result(CQL_parser cp); a pointer to the root node of the resulting tree. - Each node in a CQL tree is represented by a + Each node in a CQL tree is represented by a struct cql_node. It is defined as follows: #define CQL_NODE_ST 1 #define CQL_NODE_BOOL 2 +#define CQL_NODE_SORT 3 struct cql_node { int which; union { @@ -1087,10 +1110,17 @@ struct cql_node { struct cql_node *right; struct cql_node *modifiers; } boolean; + struct { + char *index; + struct cql_node *next; + struct cql_node *modifiers; + struct cql_node *search; + } sort; } u; }; - There are two node types: search term (ST) and boolean (BOOL). + There are three node types: search term (ST), boolean (BOOL) + and sortby (SORT). A modifier is treated as a search term too. @@ -1135,8 +1165,8 @@ struct cql_node { - The boolean node represents both and, - or, not as well as + The boolean node represents and, + or, not + proximity. @@ -1153,12 +1183,16 @@ struct cql_node { + + The sort node represents both the SORTBY clause. + + CQL to PQF conversion Conversion to PQF (and Z39.50 RPN) is tricky by the fact that the resulting RPN depends on the Z39.50 target - capabilities (combinations of supported attributes). + capabilities (combinations of supported attributes). In addition, the CQL and SRU operates on index prefixes (URI or strings), whereas the RPN uses Object Identifiers for attribute sets. @@ -1176,7 +1210,7 @@ void cql_transform_close(cql_transform_t ct); either an already open FILE or from a filename respectively. - The handle is destroyed by cql_transform_close + The handle is destroyed by cql_transform_close in which case no further reference of the handle is allowed. @@ -1186,7 +1220,7 @@ void cql_transform_close(cql_transform_t ct); int cql_transform_buf(cql_transform_t ct, struct cql_node *cn, char *out, int max); - This function converts the CQL tree cn + This function converts the CQL tree cn using handle ct. For the resulting PQF, you supply a buffer out which must be able to hold at at least max @@ -1239,7 +1273,7 @@ int cql_transform_FILE(cql_transform_t ct, Specification of CQL to RPN mappings - The file supplied to functions + The file supplied to functions cql_transform_open_FILE, cql_transform_open_fname follows a structure found in many Unix utilities. @@ -1277,7 +1311,7 @@ int cql_transform_FILE(cql_transform_t ct, - This pattern is invoked when a CQL index, such as + This pattern is invoked when a CQL index, such as dc.title is converted. set and name are the context set and index name respectively. @@ -1291,7 +1325,7 @@ int cql_transform_FILE(cql_transform_t ct, If this pattern is not defined, the mapping will fail. - The pattern, + The pattern, index.set.* is used when no other index pattern is matched. @@ -1356,7 +1390,7 @@ int cql_transform_FILE(cql_transform_t ct, This pattern specifies how a CQL structure is mapped to RPN. Note that this CQL pattern is somewhat to similar to - CQL pattern relation. + CQL pattern relation. The type is a CQL relation. @@ -1391,7 +1425,7 @@ int cql_transform_FILE(cql_transform_t ct, This specification defines a CQL context set for a given prefix. - The value on the right hand side is the URI for the set - + The value on the right hand side is the URI for the set - not RPN. All prefixes used in index patterns must be defined this way. @@ -1424,7 +1458,7 @@ int cql_transform_FILE(cql_transform_t ct, index.cql.serverChoice = 1=1016 index.dc.title = 1=4 index.dc.subject = 1=21 - + relation.< = 2=1 relation.eq = 2=3 relation.scr = 2=3 @@ -1519,7 +1553,7 @@ int cql_transform_FILE(cql_transform_t ct, containing XCQL). int cql_to_xml_buf(struct cql_node *cn, char *out, int max); -void cql_to_xml(struct cql_node *cn, +void cql_to_xml(struct cql_node *cn, void (*pr)(const char *buf, void *client_data), void *client_data); void cql_to_xml_stdio(struct cql_node *cn, FILE *f); @@ -1535,33 +1569,57 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f); a file. + + PQF to CQL conversion + + Conversion from PQF to CQL is offered by the two functions shown + below. The former uses a generic stream for result. The latter + puts result in a WRBUF (string container). + +#include <yaz/rpn2cql.h> + +int cql_transform_rpn2cql_stream(cql_transform_t ct, + void (*pr)(const char *buf, void *client_data), + void *client_data, + Z_RPNQuery *q); + +int cql_transform_rpn2cql_wrbuf(cql_transform_t ct, + WRBUF w, + Z_RPNQuery *q); + + The configuration is the same as used in CQL to PQF conversions. + + Object Identifiers The basic YAZ representation of an OID is an array of integers, - terminated with the value -1. There is a typedef - of this integer to Odr_oid but this is not consistenly - used everywhere. + terminated with the value -1. This integer is of type + Odr_oid. + + + Fundamental OID operations and the type Odr_oid + are defined in yaz/oid_util.h. - An OID can either be declared as a automatic variable or we can - allocated using the ODR/NMEM memory utilities. It's + An OID can either be declared as a automatic variable or it can + allocated using the memory utilities or ODR/NMEM. It's guaranteed that an OID can fit in OID_SIZE integers. Create OID on stack We can create an OID for the Bib-1 attribute set with: - int bib1[OID_SIZE]; - myoid[0] = 1; - myoid[1] = 2; - myoid[2] = 840; - myoid[3] = 10003; - myoid[4] = 3; - myoid[5] = 1; - myoid[6] = -1; + Odr_oid bib1[OID_SIZE]; + bib1[0] = 1; + bib1[1] = 2; + bib1[2] = 840; + bib1[3] = 10003; + bib1[4] = 3; + bib1[5] = 1; + bib1[6] = -1; @@ -1569,14 +1627,15 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f); And OID may also be filled from a string-based representation using dots (.). This is achieved by function - int oid_dotstring_to_oid(const char *name, int *oid); + int oid_dotstring_to_oid(const char *name, Odr_oid *oid); + This functions returns 0 if name could be converted; -1 otherwise. Using oid_oiddotstring_to_oid - We can create the Bib-1 attribute set OID easier with: + We can fill the Bib-1 attribute set OID easier with: - int bib1[OID_SIZE]; + Odr_oid bib1[OID_SIZE]; oid_oiddotstring_to_oid("1.2.840.10003.3.1", bib1); @@ -1604,7 +1663,7 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f); The function - char *oid_oid_to_dotstring(const int *oid, char *oidbuf) + char *oid_oid_to_dotstring(const Odr_oid *oid, char *oidbuf) does the reverse of oid_oiddotstring_to_oid. It converts an OID to the string-based representation using dots. @@ -1620,17 +1679,17 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f); Odr_oid *odr_oiddup(ODR odr, const Odr_oid *o); - + OIDs can be compared with oid_oidcmp which returns zero if the two OIDs provided are identical; non-zero otherwise. - + OID database From YAZ version 3 and later, the oident system has been replaced by an OID database. OID database is a misnomer .. the old odient - was a database system too. + system was also a database. The OID database is really just a map between named Object Identifiers @@ -1638,21 +1697,21 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f); convert from string to OID or other way around. - Unfortunately, whenever we supply a string we must also specify the + Unfortunately, whenever we supply a string we must also specify the OID class. The class is necessary because some strings correspond to multiple OIDs. An example of such a string is - Bib-1 which may either be an attribute-set + Bib-1 which may either be an attribute-set or a diagnostic-set. - Applications using the YAZ database should include - yaz/yaz_db.h. + Applications using the YAZ database should include + yaz/oid_db.h. A YAZ database handle is of type yaz_oid_db_t. Actually that's a pointer. You need not think deal with that. YAZ has a built-in database which can be considered "constant" for - most purposes. + most purposes. We can get hold that by using function yaz_oid_std. @@ -1671,7 +1730,7 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f); We can create an OID for the Bib-1 attribute set on the ODR stream odr with: - Odr_oid *bib1 = + Odr_oid *bib1 = yaz_string_to_oid_odr(yaz_oid_std(), CLASS_ATTSET, "Bib-1", odr); This is more complex than using odr_getoidbystr. @@ -1679,15 +1738,21 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f); string (here Bib-1) is supplied by a user or configuration. - + + + Standard OIDs + All the object identifers in the standard OID database as returned by yaz_oid_std can referenced directly in a - program. Each constant OID is prefixed with yaz_oid_ - + program as a constant OID. + Each constant OID is prefixed with yaz_oid_ - followed by OID class (lowercase) - then by OID name (normalized and lowercase). + See for list of all object identifiers + built into YAZ. These are declared in yaz/oid_std.h but are included by yaz/oid_db.h as well. @@ -1702,265 +1767,6 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f);
- - OID oident - - - - The oident utility has been removed from YAZ version 3. This - sub section only applies to YAZ version 2. - - - - - The OID module provides a higher-level representation of the - family of object identifiers which describe the Z39.50 protocol and its - related objects. The definition of the module interface is given in - the oid.h file. - - - - The interface is mainly based on the oident structure. - The definition of this structure looks like this: - - - -typedef struct oident -{ - oid_proto proto; - oid_class oclass; - oid_value value; - int oidsuffix[OID_SIZE]; - char *desc; -} oident; - - - - The proto field takes one of the values - - - - PROTO_Z3950 - PROTO_GENERAL - - - - Use PROTO_Z3950 for Z39.50 Object Identifers, - PROTO_GENERAL for other types (such as - those associated with ILL). - - - - The oclass field takes one of the values - - - - CLASS_APPCTX - CLASS_ABSYN - CLASS_ATTSET - CLASS_TRANSYN - CLASS_DIAGSET - CLASS_RECSYN - CLASS_RESFORM - CLASS_ACCFORM - CLASS_EXTSERV - CLASS_USERINFO - CLASS_ELEMSPEC - CLASS_VARSET - CLASS_SCHEMA - CLASS_TAGSET - CLASS_GENERAL - - - - corresponding to the OID classes defined by the Z39.50 standard. - - Finally, the value field takes one of the values - - - - VAL_APDU - VAL_BER - VAL_BASIC_CTX - VAL_BIB1 - VAL_EXP1 - VAL_EXT1 - VAL_CCL1 - VAL_GILS - VAL_WAIS - VAL_STAS - VAL_DIAG1 - VAL_ISO2709 - VAL_UNIMARC - VAL_INTERMARC - VAL_CCF - VAL_USMARC - VAL_UKMARC - VAL_NORMARC - VAL_LIBRISMARC - VAL_DANMARC - VAL_FINMARC - VAL_MAB - VAL_CANMARC - VAL_SBN - VAL_PICAMARC - VAL_AUSMARC - VAL_IBERMARC - VAL_EXPLAIN - VAL_SUTRS - VAL_OPAC - VAL_SUMMARY - VAL_GRS0 - VAL_GRS1 - VAL_EXTENDED - VAL_RESOURCE1 - VAL_RESOURCE2 - VAL_PROMPT1 - VAL_DES1 - VAL_KRB1 - VAL_PRESSET - VAL_PQUERY - VAL_PCQUERY - VAL_ITEMORDER - VAL_DBUPDATE - VAL_EXPORTSPEC - VAL_EXPORTINV - VAL_NONE - VAL_SETM - VAL_SETG - VAL_VAR1 - VAL_ESPEC1 - - - - again, corresponding to the specific OIDs defined by the standard. - Refer to the - - Registry of Z39.50 Object Identifiers for the - whole list. - - - - The desc field contains a brief, mnemonic name for the OID in question. - - - - The function - - - - struct oident *oid_getentbyoid(int *o); - - - - takes as argument an OID, and returns a pointer to a static area - containing an oident structure. You typically use - this function when you receive a PDU containing an OID, and you wish - to branch out depending on the specific OID value. - - - - The function - - - - int *oid_ent_to_oid(struct oident *ent, int *dst); - - - - Takes as argument an oident structure - in which - the proto, oclass/, and - value fields are assumed to be set correctly - - and returns a pointer to a the buffer as given by dst - containing the base - representation of the corresponding OID. The function returns - NULL and the array dst is unchanged if a mapping couldn't place. - The array dst should be at least of size - OID_SIZE. - - - - The oid_ent_to_oid() function can be used whenever - you need to prepare a PDU containing one or more OIDs. The separation of - the protocol element from the remainder of the - OID-description makes it simple to write applications that can - communicate with either Z39.50 or OSI SR-based applications. - - - - The function - - - - oid_value oid_getvalbyname(const char *name); - - - - takes as argument a mnemonic OID name, and returns the - /value field of the first entry in the database that - contains the given name in its desc field. - - - - Three utility functions are provided for translating OIDs' - symbolic names (e.g. Usmarc into OID structures - (int arrays) and strings containing the OID in dotted notation - (e.g. 1.2.840.10003.9.5.1). They are: - - - - int *oid_name_to_oid(oid_class oclass, const char *name, int *oid); - char *oid_to_dotstring(const int *oid, char *oidbuf); - char *oid_name_to_dotstring(oid_class oclass, const char *name, char *oidbuf); - - - - oid_name_to_oid() - translates the specified symbolic name, - interpreted as being of class oclass. (The - class must be specified as many symbolic names exist within - multiple classes - for example, Zthes is the - symbolic name of an attribute set, a schema and a tag-set.) The - sequence of integers representing the OID is written into the - area oid provided by the caller; it is the - caller's responsibility to ensure that this area is large enough - to contain the translated OID. As a convenience, the address of - the buffer (i.e. the value of oid) is - returned. - - - oid_to_dotstring() - Translates the int-array oid into a dotted - string which is written into the area oidbuf - supplied by the caller; it is the caller's responsibility to - ensure that this area is large enough. The address of the buffer - is returned. - - - oid_name_to_dotstring() - combines the previous two functions to derive a dotted string - representing the OID specified by oclass and - name, writing it into the buffer passed as - oidbuf and returning its address. - - - - - The OID module has been criticized - and perhaps rightly so - - for needlessly abstracting the - representation of OIDs. Other toolkits use a simple - string-representation of OIDs with good results. In practice, we have - found the interface comfortable and quick to work with, and it is a - simple matter (for what it's worth) to create applications compatible - with both ISO SR and Z39.50. Finally, the use of the - /oident database is by no means mandatory. - You can easily create your own system for representing OIDs, as long - as it is compatible with the low-level integer-array representation - of the ODR module. - - - - Nibble Memory @@ -1986,9 +1792,9 @@ typedef struct oident NMEM nmem_create(void); void nmem_destroy(NMEM n); - void *nmem_malloc(NMEM n, int size); + void *nmem_malloc(NMEM n, size_t size); void nmem_reset(NMEM n); - int nmem_total(NMEM n); + size_t nmem_total(NMEM n); void nmem_init(void); void nmem_exit(void); @@ -2016,16 +1822,16 @@ typedef struct oident not call nmem_init or nmem_exit unless you're absolute sure what you're doing. Note that in previous &yaz; versions you'd have to call - nmem_init yourself. + nmem_init yourself.
Log - &yaz; has evolved a fairly complex log system which should be useful both + &yaz; has evolved a fairly complex log system which should be useful both for debugging &yaz; itself, debugging applications that use &yaz;, and for - production use of those applications. + production use of those applications. The log functions are declared in header yaz/log.h @@ -2079,7 +1885,7 @@ typedef struct oident logged. This string should be a comma-separated list of log level names, and can contain both hard-coded names and dynamic ones. The log level calculation starts with YLOG_DEFAULT_LEVEL and adds a bit - for each word it meets, unless the word starts with a '-', in which case it + for each word it meets, unless the word starts with a '-', in which case it clears the bit. If the string 'none' is found, all bits are cleared. Typically this string comes from the command-line, often identified by -v. The @@ -2088,15 +1894,15 @@ typedef struct oident - Each module should check what log bits it should be used, by calling + Each module should check what log bits it should be used, by calling yaz_log_module_level with a suitable name for the module. The name is cleared from a preceding path and an extension, if any, so it is quite possible to use __FILE__ for it. If the name has been passed to yaz_log_mask_str, the routine returns a non-zero bitmask, which should then be used in consequent calls to yaz_log. (It can also be tested, so as to avoid unnecessary calls to - yaz_log, in time-critical places, or when the log entry would take time - to construct.) + yaz_log, in time-critical places, or when the log entry would take time + to construct.) @@ -2158,24 +1964,24 @@ typedef struct oident The log system is almost independent of the rest of &yaz;, the only important dependence is of nmem, and that only for - using the semaphore definition there. + using the semaphore definition there. The dynamic log levels and log rotation were introduced in &yaz; 2.0.28. At the same time, the log bit names were changed from - LOG_something to YLOG_something, + LOG_something to YLOG_something, to avoid collision with syslog.h. - + MARC - + - YAZ provides a fast utility that decodes MARC records and - encodes to a varity of output formats. The MARC records must - be encoded in ISO2709. + YAZ provides a fast utility for working with MARC records. + Early versions of the MARC utility only allowed decoding of ISO2709. + Today the utility may both encode - and decode to a varity of formats. @@ -2193,6 +1999,8 @@ typedef struct oident #define YAZ_MARC_MARCXML 3 #define YAZ_MARC_ISO2709 4 #define YAZ_MARC_XCHANGE 5 + #define YAZ_MARC_CHECK 6 + #define YAZ_MARC_TURBOMARC 7 /* supply iconv handle for character set conversion .. */ void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd); @@ -2202,15 +2010,22 @@ typedef struct oident /* decode MARC in buf of size bsize. Returns >0 on success; <=0 on failure. On success, result in *result with size *rsize. */ - int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize, - char **result, int *rsize); + int yaz_marc_decode_buf(yaz_marc_t mt, const char *buf, int bsize, + const char **result, size_t *rsize); /* decode MARC in buf of size bsize. Returns >0 on success; <=0 on failure. On success, result in WRBUF */ - int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, - int bsize, WRBUF wrbuf); + int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, + int bsize, WRBUF wrbuf); ]]> + + + The synopsis is just a basic subset of all functionality. Refer + to the actual header file marcdisp.h for + details. + + A MARC conversion handle must be created by using yaz_marc_create and destroyed @@ -2235,7 +2050,7 @@ typedef struct oident YAZ_MARC_MARCXML - The resulting record is converted to MARCXML. + MARCXML. @@ -2244,14 +2059,45 @@ typedef struct oident YAZ_MARC_ISO2709 - The resulting record is converted to ISO2709 (MARC). + ISO2709 (sometimes just referred to as "MARC"). + + + YAZ_MARC_XCHANGE + + + MarcXchange. + + + + + + YAZ_MARC_CHECK + + + Pseudo format for validation only. Does not generate + any real output except diagnostics. + + + + + + YAZ_MARC_TURBOMARC + + + XML format with same semantics as MARCXML but more compact + and geared towards fast processing with XSLT. Refer to + for more information. + + + + - The actual conversion functions are + The actual conversion functions are yaz_marc_decode_buf and yaz_marc_decode_wrbuf which decodes and encodes a MARC record. The former function operates on simple buffers, the @@ -2261,13 +2107,13 @@ typedef struct oident Display of MARC record - The followint program snippet illustrates how the MARC API may + The following program snippet illustrates how the MARC API may be used to convert a MARC record to the line-by-line format: + + TurboMARC + + TurboMARC is yet another XML encoding of a MARC record. The format + was designed for fast processing with XSLT. + + + Applications like + Pazpar2 uses XSLT to convert an XML encoded MARC record to an internal + representation. This conversion mostly check the tag of a MARC field + to determine the basic rules in the conversion. This check is + costly when that is tag is encoded as an attribute in MARCXML. + By having the tag value as the element instead, makes processing + many times faster (at least for Libxslt). + + + TurboMARC is encoded as follows: + + + Record elements is part of namespace + "http://www.indexdata.com/turbomarc". + + + A record is enclosed in element r. + + + A collection of records is enclosed in element + collection. + + + The leader is encoded as element l with the + leader content as its (text) value. + + + A control field is encoded as element c concatenated + with the tag value of the control field if the tag value + matches the regular expression [a-zA-Z0-9]*. + If the tag value do not match the regular expression + [a-zA-Z0-9]* the control field is encoded + as element c and attribute code + will hold the tag value. + This rule ensure that in the rare cases where a tag value might + result in a non-wellformed XML YAZ encode it as a coded attribute + (as in MARCXML). + + + The control field content is the the text value of this element. + Indicators are encoded as attribute names + i1, i2, etc.. and + corresponding values for each indicator. + + + A data field is encoded as element d concatenated + with the tag value of the data field or using the attribute + code as described in the rules for control fields. + The children of the data field element is subfield elements. + Each subfield element is encoded as s + concatenated with the sub field code. + The text of the subfield element is the contents of the subfield. + Indicators are encoded as attributes for the data field element similar + to the encoding for control fields. + + + + @@ -2287,7 +2198,7 @@ typedef struct oident YAZ version 2.1.20 or later includes a Retrieval facility tool which allows a SRU/Z39.50 to describe itself and perform record conversions. The idea is the following: - + @@ -2335,13 +2246,13 @@ typedef struct oident Retrieval XML format - All elements should be covered by namespace + All elements should be covered by namespace http://indexdata.com/yaz . The root element node must be retrievalinfo. The retrievalinfo must include one or - more retrieval elements. Each + more retrieval elements. Each retrieval defines specific combination of syntax, name and identifier supported by this retrieval service. @@ -2363,7 +2274,10 @@ typedef struct oident Defines the name of the retrieval format. This can be any string. For SRU, the value, is equivalent to schema (short-hand); - for Z39.50 it's equivalent to simple element set name. + for Z39.50 it's equivalent to simple element set name. + For YAZ 3.0.24 and later this name may be specified as a glob + expression with operators + * and ?. @@ -2379,7 +2293,7 @@ typedef struct oident
- The retrieval may include one + The retrieval may include one backend element. If a backend element is given, it specifies how the records are retrieved by some backend and how the records are converted from the backend to @@ -2400,16 +2314,16 @@ typedef struct oident marc - The marc element specifies a conversion - to - and from ISO2709 encoded MARC and - &marcxml;/MarcXchange. + The marc element specifies a conversion + to - and from ISO2709 encoded MARC and + &acro.marcxml;/MarcXchange. The following attributes may be specified: inputformat (REQUIRED) - Format of input. Supported values are + Format of input. Supported values are marc (for ISO2709); and xml for MARCXML/MarcXchange. @@ -2419,8 +2333,8 @@ typedef struct oident outputformat (REQUIRED) - Format of output. Supported values are - line (MARC line format); + Format of output. Supported values are + line (MARC line format); marcxml (for MARCXML), marc (ISO2709), marcxhcange (for MarcXchange). @@ -2456,7 +2370,7 @@ typedef struct oident The xslt element specifies a conversion - via &xslt;. The following attributes may be specified: + via &acro.xslt;. The following attributes may be specified: stylesheet (REQUIRED) @@ -2537,13 +2451,13 @@ typedef struct oident It should be easy to use the retrieval systems from applications. Refer to the headers - yaz/retrieval.h and + yaz/retrieval.h and yaz/record_conv.h.
- +