X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fmod_dom.c;h=25b6a23427f025082b149038908f4cd904286c95;hb=918ed6b8ab55442925eb485df3cebea180cff983;hp=d42d80b0ecaad8fd5e1943a3cfbc838bb69e0172;hpb=97dc097858772a66c8e90e8b07f77c9c20450131;p=idzebra-moved-to-github.git diff --git a/index/mod_dom.c b/index/mod_dom.c index d42d80b..25b6a23 100644 --- a/index/mod_dom.c +++ b/index/mod_dom.c @@ -1,4 +1,4 @@ -/* $Id: mod_dom.c,v 1.1 2007-02-07 12:08:54 adam Exp $ +/* $Id: mod_dom.c,v 1.5 2007-02-13 12:19:37 marc Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -101,6 +101,136 @@ struct filter_info { #define XML_STRCMP(a,b) strcmp((char*)a, b) #define XML_STRLEN(a) strlen((char*)a) + + +static void format_pi_zebra_err(char *err_str, const char *pi_str, const char *look) +{ + strncpy(err_str, pi_str, look - pi_str); + strncpy(err_str + (look - pi_str), "->", 2); + strcpy(err_str + (look - pi_str + 2) , look); +} + + +/* +use PI parsing like this + + if (!parse_pi_zebra_20(pi_str, err_str)) + printf("ERROR '%s'\n", err_str); + +*/ + +static int parse_pi_zebra_20(const char *pi_str, char *err_str) +{ + const char *look = pi_str; + const char *bval; + const char *eval; + + char value[256]; + char index[256]; + char type[256]; + + *value = '\0'; + *index = '\0'; + *type = '\0'; + + // parsing record instruction + if (0 == strncmp(look, "record", 6)){ + look += 6; + printf("record\n"); + + if (*look && 0 == strncmp(look, " id=", 4)){ + look += 4; + bval = look; + printf(" id="); + while (*look && ' ' != *look) + look++; + eval = look; + strncpy(value, bval, eval - bval); + value[eval - bval] = '\0'; + + printf("%s\n", value); + } + + if (*look && 0 == strncmp(look, " rank=", 6)){ + look += 6; + bval = look; + printf(" rank="); + while (*look && ' ' != *look) + look++; + eval = look; + strncpy(value, bval, eval - bval); + value[eval - bval] = '\0'; + + printf("%s\n", value); + } + + if (!*look){ + return 1; + } + format_pi_zebra_err(err_str, pi_str, look); + } + + // parsing index instruction + else if (0 == strncmp(look, "index", 5)){ + look += 5; + printf("index\n"); + + // parsing all index name/type pairs + while (*look && ' ' == *look && *(look+1)){ + look++; + + // index name must not start with ';' or ' ' + if (!*look || ':' == *look || ' ' == *look){ + format_pi_zebra_err(err_str, pi_str, look); + return 0; + } + + // setting name and type to zero + *index = '\0'; + *type = '\0'; + + // parsing one index name + bval = look; + while (*look && ':' != *look && ' ' != *look){ + look++; + } + eval = look; + strncpy(index, bval, eval - bval); + index[eval - bval] = '\0'; + + + // parsing one index type, if existing + if (':' == *look){ + look++; + + bval = look; + while (*look && ' ' != *look){ + look++; + } + eval = look; + strncpy(type, bval, eval - bval); + type[eval - bval] = '\0'; + } + + printf(" %s:%s\n", index, type); + } + + if (!*look){ + return 1; + } + format_pi_zebra_err(err_str, pi_str, look); + } + + + // remaining unparsed rest of PI + else { + format_pi_zebra_err(err_str, pi_str, look); + } + + return 0; +} + + static void set_param_str(const char **params, const char *name, const char *value, ODR odr) { @@ -717,14 +847,44 @@ static void index_record(struct filter_info *tinfo,struct recExtractCtrl *ctrl, yaz_log(YLOG_WARN, "dom filter: unknown record type '%s'", type_str); } - -static int extract_doc(struct filter_info *tinfo, struct filter_input *input, - struct recExtractCtrl *p, xmlDocPtr doc) + + +static void extract_doc_alvis(struct filter_info *tinfo, + struct recExtractCtrl *recctr, + xmlDocPtr doc) { - RecWord recWord; - const char *params[10]; + if (doc){ + RecWord recWord; + xmlChar *buf_out; + int len_out; + xmlNodePtr root_ptr; + + (*recctr->init)(recctr, &recWord); + + if (recctr->flagShowRecords){ + xmlDocDumpMemory(doc, &buf_out, &len_out); + fwrite(buf_out, len_out, 1, stdout); + xmlFree(buf_out); + } + root_ptr = xmlDocGetRootElement(doc); + if (root_ptr) + index_record(tinfo, recctr, root_ptr, &recWord); + else + yaz_log(YLOG_WARN, "No root for index XML record"); + } +} + + +static int convert_extract_doc(struct filter_info *tinfo, + struct filter_input *input, + struct recExtractCtrl *p, + xmlDocPtr doc) + +{ + /* RecWord recWord; */ xmlChar *buf_out; int len_out; + const char *params[10]; xsltStylesheetPtr last_xsp = 0; xmlDocPtr store_doc = 0; @@ -734,8 +894,6 @@ static int extract_doc(struct filter_info *tinfo, struct filter_input *input, /* input conversion */ perform_convert(tinfo, input->convert, params, &doc, 0); - (*p->init)(p, &recWord); - if (tinfo->store) { /* store conversion */ @@ -759,24 +917,12 @@ static int extract_doc(struct filter_info *tinfo, struct filter_input *input, /* extract conversion */ perform_convert(tinfo, tinfo->extract->convert, params, &doc, 0); - if (doc) - { - xmlNodePtr root_ptr; - if (p->flagShowRecords) - { - xmlDocDumpMemory(doc, &buf_out, &len_out); - fwrite(buf_out, len_out, 1, stdout); - xmlFree(buf_out); - } - root_ptr = xmlDocGetRootElement(doc); - if (root_ptr) - index_record(tinfo, p, root_ptr, &recWord); - else - { - yaz_log(YLOG_WARN, "No root for index XML record"); - } + + if (doc){ + extract_doc_alvis(tinfo, p, doc); xmlFreeDoc(doc); - } + } + return RECCTRL_EXTRACT_OK; } @@ -794,7 +940,8 @@ static int extract_xml_split(struct filter_info *tinfo, p /* I/O handler */, 0 /* URL */, 0 /* encoding */, - XML_PARSE_XINCLUDE); + XML_PARSE_XINCLUDE| + XML_PARSE_NOENT); } if (!input->u.xmlreader.reader) return RECCTRL_EXTRACT_ERROR_GENERIC; @@ -815,7 +962,7 @@ static int extract_xml_split(struct filter_info *tinfo, xmlDocSetRootElement(doc, ptr2); - return extract_doc(tinfo, input, p, doc); + return convert_extract_doc(tinfo, input, p, doc); } else { @@ -840,12 +987,12 @@ static int extract_xml_full(struct filter_info *tinfo, xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */, 0 /* URL */, 0 /* encoding */, - XML_PARSE_XINCLUDE); + XML_PARSE_XINCLUDE|XML_PARSE_NOENT); if (!doc) { return RECCTRL_EXTRACT_ERROR_GENERIC; } - return extract_doc(tinfo, input, p, doc); + return convert_extract_doc(tinfo, input, p, doc); } else return RECCTRL_EXTRACT_EOF; @@ -899,7 +1046,7 @@ static int extract_iso2709(struct filter_info *tinfo, yaz_marc_write_xml(input->u.marc.handle, &root_ptr, 0, 0, 0); rdoc = xmlNewDoc((const xmlChar*) "1.0"); xmlDocSetRootElement(rdoc, root_ptr); - return extract_doc(tinfo, input, p, rdoc); + return convert_extract_doc(tinfo, input, p, rdoc); } return RECCTRL_EXTRACT_OK; } @@ -995,7 +1142,7 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */, 0 /* URL */, 0 /* encoding */, - XML_PARSE_XINCLUDE); + XML_PARSE_XINCLUDE|XML_PARSE_NOENT); if (!doc) { p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;