X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Ffilter_sort.cpp;h=ce0db12ce145a237f6869439ad402093d5223855;hb=586d78659d671683f33ec55f4a7d32b28e345ccd;hp=700b638ad65168c8d636f3c7d70ba0b7f4b8a2fe;hpb=64aa16ad23656f60d1716c0adf47be075a573cc3;p=metaproxy-moved-to-github.git diff --git a/src/filter_sort.cpp b/src/filter_sort.cpp index 700b638..ce0db12 100644 --- a/src/filter_sort.cpp +++ b/src/filter_sort.cpp @@ -1,5 +1,5 @@ /* This file is part of Metaproxy. - Copyright (C) 2005-2012 Index Data + Copyright (C) Index Data Metaproxy is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -50,6 +50,8 @@ namespace metaproxy_1 { int m_prefetch; std::string m_xpath_expr; std::string m_namespaces; + bool m_ascending; + bool m_debug; boost::mutex m_mutex; boost::condition m_cond_session_ready; std::map m_clients; @@ -61,14 +63,14 @@ namespace metaproxy_1 { Z_NamePlusRecord *npr; std::string score; void get_xpath(xmlDoc *doc, const char *namespaces, - const char *expr); + const char *expr, bool debug); bool register_namespaces(xmlXPathContextPtr xpathCtx, const char *nsList); public: Record(Z_NamePlusRecord *n, const char *namespaces, - const char *expr); + const char *expr, bool debug); ~Record(); - bool operator < (const Record &rhs); + bool operator < (const Record &rhs) const; }; class Sort::RecordList : boost::noncopyable { Odr_oid *syntax; @@ -76,12 +78,15 @@ namespace metaproxy_1 { mp::odr m_odr; std::string namespaces; std::string xpath_expr; + bool debug; public: + bool cmp(Odr_oid *syntax); void add(Z_NamePlusRecord *s); - Z_NamePlusRecord *get(int i); + int size(); + Z_NamePlusRecord *get(int i, bool ascending); void sort(); RecordList(Odr_oid *, std::string namespaces, - std::string xpath_expr); + std::string xpath_expr, bool debug); ~RecordList(); }; class Sort::ResultSet : boost::noncopyable { @@ -106,6 +111,7 @@ namespace metaproxy_1 { Odr_int start_pos, ResultSetPtr s, Odr_oid *syntax, + Z_RecordComposition *comp, const char *resultSetId); public: Frontend(Impl *impl); @@ -119,37 +125,37 @@ static void print_xpath_nodes(xmlNodeSetPtr nodes, FILE* output) xmlNodePtr cur; int size; int i; - + assert(output); size = nodes ? nodes->nodeNr : 0; - + fprintf(output, "Result (%d nodes):\n", size); for (i = 0; i < size; ++i) { assert(nodes->nodeTab[i]); - + if (nodes->nodeTab[i]->type == XML_NAMESPACE_DECL) { xmlNsPtr ns = (xmlNsPtr)nodes->nodeTab[i]; cur = (xmlNodePtr)ns->next; if (cur->ns) - fprintf(output, "= namespace \"%s\"=\"%s\" for node %s:%s\n", + fprintf(output, "= namespace \"%s\"=\"%s\" for node %s:%s\n", ns->prefix, ns->href, cur->ns->href, cur->name); else - fprintf(output, "= namespace \"%s\"=\"%s\" for node %s\n", + fprintf(output, "= namespace \"%s\"=\"%s\" for node %s\n", ns->prefix, ns->href, cur->name); } else if (nodes->nodeTab[i]->type == XML_ELEMENT_NODE) { - cur = nodes->nodeTab[i]; + cur = nodes->nodeTab[i]; if (cur->ns) - fprintf(output, "= element node \"%s:%s\"\n", + fprintf(output, "= element node \"%s:%s\"\n", cur->ns->href, cur->name); else fprintf(output, "= element node \"%s\"\n", cur->name); } else { - cur = nodes->nodeTab[i]; + cur = nodes->nodeTab[i]; fprintf(output, "= node \"%s\": type %d\n", cur->name, cur->type); } } @@ -162,15 +168,15 @@ bool yf::Sort::Record::register_namespaces(xmlXPathContextPtr xpathCtx, xmlChar* prefix; xmlChar* href; xmlChar* next; - + assert(xpathCtx); assert(nsList); nsListDup = xmlStrdup((const xmlChar *) nsList); if (!nsListDup) return false; - - next = nsListDup; + + next = nsListDup; while (next) { /* skip spaces */ @@ -187,13 +193,13 @@ bool yf::Sort::Record::register_namespaces(xmlXPathContextPtr xpathCtx, xmlFree(nsListDup); return false; } - *next++ = '\0'; - + *next++ = '\0'; + /* find href */ href = next; next = (xmlChar*)xmlStrchr(next, ' '); if (next) - *next++ = '\0'; + *next++ = '\0'; /* do register namespace */ if (xmlXPathRegisterNs(xpathCtx, prefix, href) != 0) @@ -202,7 +208,7 @@ bool yf::Sort::Record::register_namespaces(xmlXPathContextPtr xpathCtx, return false; } } - + xmlFree(nsListDup); return true; } @@ -210,18 +216,42 @@ bool yf::Sort::Record::register_namespaces(xmlXPathContextPtr xpathCtx, void yf::Sort::Record::get_xpath(xmlDoc *doc, const char *namespaces, - const char *expr) + const char *expr, bool debug) { xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); if (xpathCtx) - { + { register_namespaces(xpathCtx, namespaces); xmlXPathObjectPtr xpathObj = xmlXPathEvalExpression((const xmlChar *) expr, xpathCtx); if (xpathObj) { - print_xpath_nodes(xpathObj->nodesetval, stdout); - + xmlNodeSetPtr nodes = xpathObj->nodesetval; + if (debug) + print_xpath_nodes(nodes, yaz_log_file()); + if (nodes) + { + int i; + for (i = 0; i < nodes->nodeNr; i++) + { + std::string content; + xmlNode *ptr = nodes->nodeTab[i]; + if (ptr->type == XML_ELEMENT_NODE || + ptr->type == XML_ATTRIBUTE_NODE) + { + content = mp::xml::get_text(ptr->children); + } + else if (ptr->type == XML_TEXT_NODE) + { + content = mp::xml::get_text(ptr); + } + if (content.length()) + { + score = content; + break; + } + } + } xmlXPathFreeObject(xpathObj); } xmlXPathFreeContext(xpathCtx); @@ -230,7 +260,8 @@ void yf::Sort::Record::get_xpath(xmlDoc *doc, const char *namespaces, yf::Sort::Record::Record(Z_NamePlusRecord *n, const char *namespaces, - const char *expr) : npr(n) + const char *expr, + bool debug) : npr(n) { if (npr->which == Z_NamePlusRecord_databaseRecord) { @@ -244,7 +275,7 @@ yf::Sort::Record::Record(Z_NamePlusRecord *n, ext->u.octet_aligned->len); if (doc) { - get_xpath(doc, namespaces, expr); + get_xpath(doc, namespaces, expr, debug); xmlFreeDoc(doc); } } @@ -255,36 +286,18 @@ yf::Sort::Record::~Record() { } -bool yf::Sort::Record::operator < (const Record &rhs) +bool yf::Sort::Record::operator < (const Record &rhs) const { - int l_score = 0; - const char *l_database = this->npr->databaseName; - if (l_database) - { - const char *cp = strstr(l_database, ";score="); - if (cp) - l_score = atoi(cp + 7); - } - - int r_score = 0; - const char *r_database = rhs.npr->databaseName; - if (r_database) - { - const char *cp = strstr(r_database, ";score="); - if (cp) - r_score = atoi(cp + 7); - } - - if (l_score < r_score) + if (strcmp(this->score.c_str(), rhs.score.c_str()) < 0) return true; - else - return false; + return false; } yf::Sort::RecordList::RecordList(Odr_oid *syntax, std::string a_namespaces, - std::string a_xpath_expr) - : namespaces(a_namespaces), xpath_expr(a_xpath_expr) + std::string a_xpath_expr, + bool a_debug) + : namespaces(a_namespaces), xpath_expr(a_xpath_expr), debug(a_debug) { if (syntax) @@ -295,25 +308,42 @@ yf::Sort::RecordList::RecordList(Odr_oid *syntax, yf::Sort::RecordList::~RecordList() { - + } - + +bool yf::Sort::RecordList::cmp(Odr_oid *syntax) +{ + if ((!this->syntax && !syntax) + || + (this->syntax && syntax && !oid_oidcmp(this->syntax, syntax))) + return true; + return false; +} + +int yf::Sort::RecordList::size() +{ + return npr_list.size(); +} + void yf::Sort::RecordList::add(Z_NamePlusRecord *s) { ODR oi = m_odr; - yaz_log(YLOG_LOG, "Adding to recordList %p", this); - Record record(yaz_clone_z_NamePlusRecord(s, oi->mem), - namespaces.c_str(), - xpath_expr.c_str()); + Z_NamePlusRecord *npr = yaz_clone_z_NamePlusRecord(s, oi->mem); + Record record(npr, namespaces.c_str(), xpath_expr.c_str(), debug); npr_list.push_back(record); } -Z_NamePlusRecord *yf::Sort::RecordList::get(int i) +Z_NamePlusRecord *yf::Sort::RecordList::get(int pos, bool ascending) { std::list::const_iterator it = npr_list.begin(); + int i = pos; + if (!ascending) + i = npr_list.size() - pos - 1; for (; it != npr_list.end(); it++, --i) if (i <= 0) + { return it->npr; + } return 0; } @@ -342,7 +372,7 @@ void yf::Sort::process(mp::Package &package) const } -yf::Sort::Frontend::Frontend(Impl *impl) : +yf::Sort::Frontend::Frontend(Impl *impl) : m_p(impl), m_is_virtual(false), m_in_use(true) { } @@ -352,12 +382,12 @@ yf::Sort::Frontend::~Frontend() } -yf::Sort::Impl::Impl() : m_prefetch(20) +yf::Sort::Impl::Impl() : m_prefetch(20), m_ascending(true), m_debug(false) { } yf::Sort::Impl::~Impl() -{ +{ } yf::Sort::FrontendPtr yf::Sort::Impl::get_frontend(mp::Package &package) @@ -365,13 +395,13 @@ yf::Sort::FrontendPtr yf::Sort::Impl::get_frontend(mp::Package &package) boost::mutex::scoped_lock lock(m_mutex); std::map::iterator it; - + while(true) { it = m_clients.find(package.session()); if (it == m_clients.end()) break; - + if (!it->second->m_in_use) { it->second->m_in_use = true; @@ -389,7 +419,7 @@ void yf::Sort::Impl::release_frontend(mp::Package &package) { boost::mutex::scoped_lock lock(m_mutex); std::map::iterator it; - + it = m_clients.find(package.session()); if (it != m_clients.end()) { @@ -412,8 +442,8 @@ void yf::Sort::Impl::configure(const xmlNode *ptr, bool test_only, { if (ptr->type != XML_ELEMENT_NODE) continue; - if (!strcmp((const char *) ptr->name, "config")) - { + if (!strcmp((const char *) ptr->name, "sort")) + { const struct _xmlAttr *attr; for (attr = ptr->properties; attr; attr = attr->next) { @@ -423,7 +453,7 @@ void yf::Sort::Impl::configure(const xmlNode *ptr, bool test_only, if (m_prefetch < 0) { throw mp::filter::FilterException( - "Bad or missing value for attribute " + + "Bad or missing value for attribute " + std::string((const char *) attr->name)); } } @@ -435,6 +465,14 @@ void yf::Sort::Impl::configure(const xmlNode *ptr, bool test_only, { m_namespaces = mp::xml::get_text(attr->children); } + else if (!strcmp((const char *) attr->name, "ascending")) + { + m_ascending = mp::xml::get_bool(attr->children, true); + } + else if (!strcmp((const char *) attr->name, "debug")) + { + m_debug = mp::xml::get_bool(attr->children, false); + } else throw mp::filter::FilterException( "Bad attribute " + @@ -444,7 +482,7 @@ void yf::Sort::Impl::configure(const xmlNode *ptr, bool test_only, else { throw mp::filter::FilterException - ("Bad element " + ("Bad element " + std::string((const char *) ptr->name) + " in sort filter"); } @@ -463,17 +501,25 @@ void yf::Sort::Frontend::handle_records(mp::Package &package, Odr_int start_pos, ResultSetPtr s, Odr_oid *syntax, + Z_RecordComposition *comp, const char *resultSetId) { if (records && records->which == Z_Records_DBOSD && start_pos == 1) { + std::list::const_iterator it = s->record_lists.begin(); + + for (; it != s->record_lists.end(); it++) + if ((*it)->cmp(syntax)) + return; + Z_NamePlusRecordList *nprl = records->u.databaseOrSurDiagnostics; int i; // i is number of records fetched in last response int pos = 1; RecordListPtr rlp(new RecordList(syntax, m_p->m_namespaces.c_str(), - m_p->m_xpath_expr.c_str())); + m_p->m_xpath_expr.c_str(), + m_p->m_debug)); for (i = 0; i < nprl->num_records; i++, pos++) rlp->add(nprl->records[i]); @@ -495,6 +541,7 @@ void yf::Sort::Frontend::handle_records(mp::Package &package, *p_req->numberOfRecordsRequested = end_pos - pos + 1; p_req->preferredRecordSyntax = syntax; p_req->resultSetId = odr_strdup(odr, resultSetId); + p_req->recordComposition = comp; present_package.request() = p_apdu; present_package.move(); @@ -518,62 +565,80 @@ void yf::Sort::Frontend::handle_records(mp::Package &package, rlp->sort(); for (i = 0; i < nprl->num_records; i++) - nprl->records[i] = rlp->get(i); + nprl->records[i] = rlp->get(i, m_p->m_ascending); } } void yf::Sort::Frontend::handle_search(mp::Package &package, Z_APDU *apdu_req) { - Z_SearchRequest *req = apdu_req->u.searchRequest; + Z_SearchRequest *req = apdu_req->u.searchRequest; std::string resultSetId = req->resultSetName; - Package b_package(package.session(), package.origin()); mp::odr odr; + Odr_oid *syntax = 0; + + if (req->preferredRecordSyntax) + syntax = odr_oiddup(odr, req->preferredRecordSyntax); - b_package.copy_filter(package); Sets_it sets_it = m_sets.find(req->resultSetName); if (sets_it != m_sets.end()) { - // result set already exist + // result set already exist // if replace indicator is off: we return diagnostic if // result set already exist. if (*req->replaceIndicator == 0) { - Z_APDU *apdu = + Z_APDU *apdu = odr.create_searchResponse( apdu_req, YAZ_BIB1_RESULT_SET_EXISTS_AND_REPLACE_INDICATOR_OFF, 0); package.response() = apdu; return; - } + } m_sets.erase(resultSetId); } ResultSetPtr s(new ResultSet); m_sets[resultSetId] = s; - package.move(); - Z_GDU *gdu_res = package.response().get(); + + Package b_package(package.session(), package.origin()); + b_package.copy_filter(package); + b_package.request() = apdu_req; + b_package.move(); + + Z_GDU *gdu_res = b_package.response().get(); if (gdu_res && gdu_res->which == Z_GDU_Z3950 && gdu_res->u.z3950->which == Z_APDU_searchResponse) { Z_SearchResponse *res = gdu_res->u.z3950->u.searchResponse; + Z_RecordComposition *record_comp = + mp::util::piggyback_to_RecordComposition(odr, + *res->resultCount, req); s->hit_count = *res->resultCount; handle_records(b_package, apdu_req, res->records, 1, s, - req->preferredRecordSyntax, resultSetId.c_str()); + syntax, record_comp, resultSetId.c_str()); + package.response() = gdu_res; } + else + package.response() = b_package.response(); + if (b_package.session().is_closed()) + b_package.session().close(); } void yf::Sort::Frontend::handle_present(mp::Package &package, Z_APDU *apdu_req) { - Z_PresentRequest *req = apdu_req->u.presentRequest; + Z_PresentRequest *req = apdu_req->u.presentRequest; std::string resultSetId = req->resultSetId; - Package b_package(package.session(), package.origin()); mp::odr odr; + Odr_oid *syntax = 0; + Odr_int start = *req->resultSetStartPoint; + + if (req->preferredRecordSyntax) + syntax = odr_oiddup(odr, req->preferredRecordSyntax); - b_package.copy_filter(package); Sets_it sets_it = m_sets.find(resultSetId); if (sets_it == m_sets.end()) { - Z_APDU *apdu = + Z_APDU *apdu = odr.create_presentResponse( apdu_req, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST, @@ -581,16 +646,61 @@ void yf::Sort::Frontend::handle_present(mp::Package &package, Z_APDU *apdu_req) package.response() = apdu; return; } - package.move(); + ResultSetPtr rset = sets_it->second; + std::list::const_iterator it = rset->record_lists.begin(); + for (; it != rset->record_lists.end(); it++) + if ((*it)->cmp(req->preferredRecordSyntax)) + { + if (*req->resultSetStartPoint - 1 + *req->numberOfRecordsRequested + <= (*it)->size()) + { + int i; + Z_APDU *p_apdu = zget_APDU(odr, Z_APDU_presentResponse); + Z_PresentResponse *p_res = p_apdu->u.presentResponse; + + *p_res->nextResultSetPosition = *req->resultSetStartPoint + + *req->numberOfRecordsRequested; + *p_res->numberOfRecordsReturned = + *req->numberOfRecordsRequested; + p_res->records = (Z_Records *) + odr_malloc(odr, sizeof(*p_res->records)); + p_res->records->which = Z_Records_DBOSD; + Z_NamePlusRecordList *nprl = (Z_NamePlusRecordList *) + odr_malloc(odr, sizeof(*nprl)); + p_res->records->u.databaseOrSurDiagnostics = nprl; + nprl->num_records = *req->numberOfRecordsRequested; + nprl->records = (Z_NamePlusRecord **) + odr_malloc(odr, nprl->num_records * sizeof(*nprl->records)); + for (i = 0; i < nprl->num_records; i++) + { + int pos = i + *req->resultSetStartPoint - 1; + nprl->records[i] = (*it)->get(pos, m_p->m_ascending); + } + package.response() = p_apdu; + return; + } + break; + } + + + Package b_package(package.session(), package.origin()); + b_package.copy_filter(package); + b_package.request() = apdu_req; + b_package.move(); Z_GDU *gdu_res = package.response().get(); if (gdu_res && gdu_res->which == Z_GDU_Z3950 && gdu_res->u.z3950->which == Z_APDU_presentResponse) { Z_PresentResponse *res = gdu_res->u.z3950->u.presentResponse; - handle_records(b_package, apdu_req, res->records, - *req->resultSetStartPoint, sets_it->second, - req->preferredRecordSyntax, resultSetId.c_str()); + handle_records(b_package, apdu_req, res->records, + start, rset, syntax, req->recordComposition, + resultSetId.c_str()); + package.response() = gdu_res; } + else + package.response() = b_package.response(); + if (b_package.session().is_closed()) + b_package.session().close(); } void yf::Sort::Frontend::handle_package(mp::Package &package)