From 64aa16ad23656f60d1716c0adf47be075a573cc3 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 19 Mar 2012 16:08:32 +0100 Subject: [PATCH] Further work on sort filter . Using x-path for getting score, but this is not yet in use. --- src/filter_sort.cpp | 560 ++++++++++++++++++++++++++++++++++++++++++++++++++- src/filter_sort.hpp | 9 + 2 files changed, 564 insertions(+), 5 deletions(-) diff --git a/src/filter_sort.cpp b/src/filter_sort.cpp index 8202577..700b638 100644 --- a/src/filter_sort.cpp +++ b/src/filter_sort.cpp @@ -21,7 +21,15 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include +#include +#include +#include +#include +#include +#include + #include +#include #include @@ -31,20 +39,289 @@ namespace yf = mp::filter; namespace metaproxy_1 { namespace filter { class Sort::Impl { + friend class Frontend; public: Impl(); ~Impl(); - void process(metaproxy_1::Package & package) const; + void process(metaproxy_1::Package & package); void configure(const xmlNode * ptr, bool test_only, const char *path); private: int m_prefetch; + std::string m_xpath_expr; + std::string m_namespaces; + boost::mutex m_mutex; + boost::condition m_cond_session_ready; + std::map m_clients; + FrontendPtr get_frontend(mp::Package &package); + void release_frontend(mp::Package &package); + }; + class Sort::Record { + friend class RecordList; + Z_NamePlusRecord *npr; + std::string score; + void get_xpath(xmlDoc *doc, const char *namespaces, + const char *expr); + bool register_namespaces(xmlXPathContextPtr xpathCtx, + const char *nsList); + public: + Record(Z_NamePlusRecord *n, const char *namespaces, + const char *expr); + ~Record(); + bool operator < (const Record &rhs); + }; + class Sort::RecordList : boost::noncopyable { + Odr_oid *syntax; + std::list npr_list; + mp::odr m_odr; + std::string namespaces; + std::string xpath_expr; + public: + void add(Z_NamePlusRecord *s); + Z_NamePlusRecord *get(int i); + void sort(); + RecordList(Odr_oid *, std::string namespaces, + std::string xpath_expr); + ~RecordList(); + }; + class Sort::ResultSet : boost::noncopyable { + friend class Frontend; + Odr_int hit_count; + std::list record_lists; + }; + class Sort::Frontend : boost::noncopyable { + friend class Impl; + Impl *m_p; + bool m_is_virtual; + bool m_in_use; + std::map m_sets; + typedef std::map::iterator Sets_it; + void handle_package(mp::Package &package); + void handle_search(mp::Package &package, Z_APDU *apdu_req); + void handle_present(mp::Package &package, Z_APDU *apdu_req); + + void handle_records(mp::Package &package, + Z_APDU *apdu_reqq, + Z_Records *records, + Odr_int start_pos, + ResultSetPtr s, + Odr_oid *syntax, + const char *resultSetId); + public: + Frontend(Impl *impl); + ~Frontend(); }; } } -// define Pimpl wrapper forwarding to Impl +static void print_xpath_nodes(xmlNodeSetPtr nodes, FILE* output) +{ + xmlNodePtr cur; + int size; + int i; + + assert(output); + size = nodes ? nodes->nodeNr : 0; + + fprintf(output, "Result (%d nodes):\n", size); + for (i = 0; i < size; ++i) { + assert(nodes->nodeTab[i]); + + if (nodes->nodeTab[i]->type == XML_NAMESPACE_DECL) + { + xmlNsPtr ns = (xmlNsPtr)nodes->nodeTab[i]; + cur = (xmlNodePtr)ns->next; + if (cur->ns) + fprintf(output, "= namespace \"%s\"=\"%s\" for node %s:%s\n", + ns->prefix, ns->href, cur->ns->href, cur->name); + else + fprintf(output, "= namespace \"%s\"=\"%s\" for node %s\n", + ns->prefix, ns->href, cur->name); + } + else if (nodes->nodeTab[i]->type == XML_ELEMENT_NODE) + { + cur = nodes->nodeTab[i]; + if (cur->ns) + fprintf(output, "= element node \"%s:%s\"\n", + cur->ns->href, cur->name); + else + fprintf(output, "= element node \"%s\"\n", cur->name); + } + else + { + cur = nodes->nodeTab[i]; + fprintf(output, "= node \"%s\": type %d\n", cur->name, cur->type); + } + } +} + +bool yf::Sort::Record::register_namespaces(xmlXPathContextPtr xpathCtx, + const char *nsList) +{ + xmlChar* nsListDup; + xmlChar* prefix; + xmlChar* href; + xmlChar* next; + + assert(xpathCtx); + assert(nsList); + + nsListDup = xmlStrdup((const xmlChar *) nsList); + if (!nsListDup) + return false; + + next = nsListDup; + while (next) + { + /* skip spaces */ + while (*next == ' ') + next++; + if (*next == '\0') + break; + + /* find prefix */ + prefix = next; + next = (xmlChar *) xmlStrchr(next, '='); + if (next == NULL) + { + xmlFree(nsListDup); + return false; + } + *next++ = '\0'; + + /* find href */ + href = next; + next = (xmlChar*)xmlStrchr(next, ' '); + if (next) + *next++ = '\0'; + + /* do register namespace */ + if (xmlXPathRegisterNs(xpathCtx, prefix, href) != 0) + { + xmlFree(nsListDup); + return false; + } + } + + xmlFree(nsListDup); + return true; +} + + + +void yf::Sort::Record::get_xpath(xmlDoc *doc, const char *namespaces, + const char *expr) +{ + xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); + if (xpathCtx) + { + register_namespaces(xpathCtx, namespaces); + xmlXPathObjectPtr xpathObj = + xmlXPathEvalExpression((const xmlChar *) expr, xpathCtx); + if (xpathObj) + { + print_xpath_nodes(xpathObj->nodesetval, stdout); + + xmlXPathFreeObject(xpathObj); + } + xmlXPathFreeContext(xpathCtx); + } +} + +yf::Sort::Record::Record(Z_NamePlusRecord *n, + const char *namespaces, + const char *expr) : npr(n) +{ + if (npr->which == Z_NamePlusRecord_databaseRecord) + { + Z_External *ext = npr->u.databaseRecord; + + if (ext->which == Z_External_octet && + !oid_oidcmp(ext->direct_reference, yaz_oid_recsyn_xml)) + { + xmlDoc *doc = xmlParseMemory( + (const char *) ext->u.octet_aligned->buf, + ext->u.octet_aligned->len); + if (doc) + { + get_xpath(doc, namespaces, expr); + xmlFreeDoc(doc); + } + } + } +} + +yf::Sort::Record::~Record() +{ +} + +bool yf::Sort::Record::operator < (const Record &rhs) +{ + int l_score = 0; + const char *l_database = this->npr->databaseName; + if (l_database) + { + const char *cp = strstr(l_database, ";score="); + if (cp) + l_score = atoi(cp + 7); + } + + int r_score = 0; + const char *r_database = rhs.npr->databaseName; + if (r_database) + { + const char *cp = strstr(r_database, ";score="); + if (cp) + r_score = atoi(cp + 7); + } + + if (l_score < r_score) + return true; + else + return false; +} + +yf::Sort::RecordList::RecordList(Odr_oid *syntax, + std::string a_namespaces, + std::string a_xpath_expr) + : namespaces(a_namespaces), xpath_expr(a_xpath_expr) + +{ + if (syntax) + this->syntax = odr_oiddup(m_odr, syntax); + else + this->syntax = 0; +} + +yf::Sort::RecordList::~RecordList() +{ + +} +void yf::Sort::RecordList::add(Z_NamePlusRecord *s) +{ + ODR oi = m_odr; + yaz_log(YLOG_LOG, "Adding to recordList %p", this); + Record record(yaz_clone_z_NamePlusRecord(s, oi->mem), + namespaces.c_str(), + xpath_expr.c_str()); + npr_list.push_back(record); +} + +Z_NamePlusRecord *yf::Sort::RecordList::get(int i) +{ + std::list::const_iterator it = npr_list.begin(); + for (; it != npr_list.end(); it++, --i) + if (i <= 0) + return it->npr; + return 0; +} + +void yf::Sort::RecordList::sort() +{ + npr_list.sort(); +} + yf::Sort::Sort() : m_p(new Impl) { } @@ -64,6 +341,17 @@ void yf::Sort::process(mp::Package &package) const m_p->process(package); } + +yf::Sort::Frontend::Frontend(Impl *impl) : + m_p(impl), m_is_virtual(false), m_in_use(true) +{ +} + +yf::Sort::Frontend::~Frontend() +{ +} + + yf::Sort::Impl::Impl() : m_prefetch(20) { } @@ -72,15 +360,277 @@ yf::Sort::Impl::~Impl() { } -void yf::Sort::Impl::configure(const xmlNode *xmlnode, bool test_only, +yf::Sort::FrontendPtr yf::Sort::Impl::get_frontend(mp::Package &package) +{ + boost::mutex::scoped_lock lock(m_mutex); + + std::map::iterator it; + + while(true) + { + it = m_clients.find(package.session()); + if (it == m_clients.end()) + break; + + if (!it->second->m_in_use) + { + it->second->m_in_use = true; + return it->second; + } + m_cond_session_ready.wait(lock); + } + FrontendPtr f(new Frontend(this)); + m_clients[package.session()] = f; + f->m_in_use = true; + return f; +} + +void yf::Sort::Impl::release_frontend(mp::Package &package) +{ + boost::mutex::scoped_lock lock(m_mutex); + std::map::iterator it; + + it = m_clients.find(package.session()); + if (it != m_clients.end()) + { + if (package.session().is_closed()) + { + m_clients.erase(it); + } + else + { + it->second->m_in_use = false; + } + m_cond_session_ready.notify_all(); + } +} + +void yf::Sort::Impl::configure(const xmlNode *ptr, bool test_only, const char *path) { + for (ptr = ptr->children; ptr; ptr = ptr->next) + { + if (ptr->type != XML_ELEMENT_NODE) + continue; + if (!strcmp((const char *) ptr->name, "config")) + { + const struct _xmlAttr *attr; + for (attr = ptr->properties; attr; attr = attr->next) + { + if (!strcmp((const char *) attr->name, "prefetch")) + { + m_prefetch = mp::xml::get_int(attr->children, -1); + if (m_prefetch < 0) + { + throw mp::filter::FilterException( + "Bad or missing value for attribute " + + std::string((const char *) attr->name)); + } + } + else if (!strcmp((const char *) attr->name, "xpath")) + { + m_xpath_expr = mp::xml::get_text(attr->children); + } + else if (!strcmp((const char *) attr->name, "namespaces")) + { + m_namespaces = mp::xml::get_text(attr->children); + } + else + throw mp::filter::FilterException( + "Bad attribute " + + std::string((const char *) attr->name)); + } + } + else + { + throw mp::filter::FilterException + ("Bad element " + + std::string((const char *) ptr->name) + + " in sort filter"); + } + } + if (m_xpath_expr.length() == 0) + { + throw mp::filter::FilterException + ("Missing xpath attribute for config element in sort filter"); + } + } -void yf::Sort::Impl::process(mp::Package &package) const +void yf::Sort::Frontend::handle_records(mp::Package &package, + Z_APDU *apdu_req, + Z_Records *records, + Odr_int start_pos, + ResultSetPtr s, + Odr_oid *syntax, + const char *resultSetId) { - // Z_GDU *gdu = package.request().get(); + if (records && records->which == Z_Records_DBOSD && start_pos == 1) + { + Z_NamePlusRecordList *nprl = records->u.databaseOrSurDiagnostics; + int i; // i is number of records fetched in last response + + int pos = 1; + RecordListPtr rlp(new RecordList(syntax, + m_p->m_namespaces.c_str(), + m_p->m_xpath_expr.c_str())); + for (i = 0; i < nprl->num_records; i++, pos++) + rlp->add(nprl->records[i]); + + int end_pos = m_p->m_prefetch; + if (end_pos > s->hit_count) + end_pos = s->hit_count; + while (i && pos <= end_pos) + { + mp::odr odr; + i = 0; + + Package present_package(package.session(), package.origin()); + present_package.copy_filter(package); + + Z_APDU *p_apdu = zget_APDU(odr, Z_APDU_presentRequest); + Z_PresentRequest *p_req = p_apdu->u.presentRequest; + + *p_req->resultSetStartPoint = pos; + *p_req->numberOfRecordsRequested = end_pos - pos + 1; + p_req->preferredRecordSyntax = syntax; + p_req->resultSetId = odr_strdup(odr, resultSetId); + + present_package.request() = p_apdu; + present_package.move(); + + Z_GDU *gdu_res = present_package.response().get(); + if (gdu_res && gdu_res->which == Z_GDU_Z3950 && + gdu_res->u.z3950->which == Z_APDU_presentResponse) + { + Z_PresentResponse *res = gdu_res->u.z3950->u.presentResponse; + Z_Records *records = res->records; + if (records && records->which == Z_Records_DBOSD) + { + Z_NamePlusRecordList *nprl = + records->u.databaseOrSurDiagnostics; + for (i = 0; i < nprl->num_records; i++, pos++) + rlp->add(nprl->records[i]); + } + } + } + s->record_lists.push_back(rlp); + rlp->sort(); + + for (i = 0; i < nprl->num_records; i++) + nprl->records[i] = rlp->get(i); + } +} + +void yf::Sort::Frontend::handle_search(mp::Package &package, Z_APDU *apdu_req) +{ + Z_SearchRequest *req = apdu_req->u.searchRequest; + std::string resultSetId = req->resultSetName; + Package b_package(package.session(), package.origin()); + mp::odr odr; + + b_package.copy_filter(package); + Sets_it sets_it = m_sets.find(req->resultSetName); + if (sets_it != m_sets.end()) + { + // result set already exist + // if replace indicator is off: we return diagnostic if + // result set already exist. + if (*req->replaceIndicator == 0) + { + Z_APDU *apdu = + odr.create_searchResponse( + apdu_req, + YAZ_BIB1_RESULT_SET_EXISTS_AND_REPLACE_INDICATOR_OFF, + 0); + package.response() = apdu; + return; + } + m_sets.erase(resultSetId); + } + ResultSetPtr s(new ResultSet); + m_sets[resultSetId] = s; + package.move(); + Z_GDU *gdu_res = package.response().get(); + if (gdu_res && gdu_res->which == Z_GDU_Z3950 && gdu_res->u.z3950->which == + Z_APDU_searchResponse) + { + Z_SearchResponse *res = gdu_res->u.z3950->u.searchResponse; + s->hit_count = *res->resultCount; + handle_records(b_package, apdu_req, res->records, 1, s, + req->preferredRecordSyntax, resultSetId.c_str()); + } +} + +void yf::Sort::Frontend::handle_present(mp::Package &package, Z_APDU *apdu_req) +{ + Z_PresentRequest *req = apdu_req->u.presentRequest; + std::string resultSetId = req->resultSetId; + Package b_package(package.session(), package.origin()); + mp::odr odr; + + b_package.copy_filter(package); + Sets_it sets_it = m_sets.find(resultSetId); + if (sets_it == m_sets.end()) + { + Z_APDU *apdu = + odr.create_presentResponse( + apdu_req, + YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST, + resultSetId.c_str()); + package.response() = apdu; + return; + } package.move(); + Z_GDU *gdu_res = package.response().get(); + if (gdu_res && gdu_res->which == Z_GDU_Z3950 && gdu_res->u.z3950->which == + Z_APDU_presentResponse) + { + Z_PresentResponse *res = gdu_res->u.z3950->u.presentResponse; + handle_records(b_package, apdu_req, res->records, + *req->resultSetStartPoint, sets_it->second, + req->preferredRecordSyntax, resultSetId.c_str()); + } +} + +void yf::Sort::Frontend::handle_package(mp::Package &package) +{ + Z_GDU *gdu = package.request().get(); + if (gdu && gdu->which == Z_GDU_Z3950) + { + Z_APDU *apdu_req = gdu->u.z3950; + switch (apdu_req->which) + { + case Z_APDU_searchRequest: + handle_search(package, apdu_req); + return; + case Z_APDU_presentRequest: + handle_present(package, apdu_req); + return; + } + } + package.move(); +} + +void yf::Sort::Impl::process(mp::Package &package) +{ + FrontendPtr f = get_frontend(package); + Z_GDU *gdu = package.request().get(); + + if (f->m_is_virtual) + { + f->handle_package(package); + } + else if (gdu && gdu->which == Z_GDU_Z3950 && gdu->u.z3950->which == + Z_APDU_initRequest) + { + package.move(); + f->m_is_virtual = true; + } + else + package.move(); + + release_frontend(package); } diff --git a/src/filter_sort.hpp b/src/filter_sort.hpp index 5e660f1..b1585d5 100644 --- a/src/filter_sort.hpp +++ b/src/filter_sort.hpp @@ -20,6 +20,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #define FILTER_SORT_HPP #include +#include #include @@ -27,6 +28,14 @@ namespace metaproxy_1 { namespace filter { class Sort : public Base { class Impl; + class Frontend; + class ResultSet; + class RecordList; + class Record; + typedef boost::shared_ptr FrontendPtr; + typedef boost::shared_ptr ResultSetPtr; + typedef boost::shared_ptr RecordListPtr; + boost::scoped_ptr m_p; public: Sort(); -- 1.7.10.4