Rename from yp2 to metaproxy. The namespace for all definitions
[metaproxy-moved-to-github.git] / src / filter_multi.cpp
index 7e847d4..a03319a 100644 (file)
@@ -1,5 +1,5 @@
-/* $Id: filter_multi.cpp,v 1.7 2006-01-18 09:20:30 adam Exp $
-   Copyright (c) 2005, Index Data.
+/* $Id: filter_multi.cpp,v 1.15 2006-03-16 10:40:59 adam Exp $
+   Copyright (c) 2005-2006, Index Data.
 
 %LICENSE%
  */
 #include <yaz/otherinfo.h>
 #include <yaz/diagbib1.h>
 
+#include <vector>
+#include <algorithm>
 #include <map>
 #include <iostream>
 
-namespace yf = yp2::filter;
+namespace mp = metaproxy_1;
+namespace yf = mp::filter;
 
-namespace yp2 {
+namespace metaproxy_1 {
     namespace filter {
 
         struct Multi::BackendSet {
             BackendPtr m_backend;
             int m_count;
             bool operator < (const BackendSet &k) const;
+            bool operator == (const BackendSet &k) const;
+        };
+        struct Multi::ScanTermInfo {
+            std::string m_norm_term;
+            std::string m_display_term;
+            int m_count;
+            bool operator < (const ScanTermInfo &) const;
+            bool operator == (const ScanTermInfo &) const;
+            Z_Entry *get_entry(ODR odr);
         };
         struct Multi::FrontendSet {
             struct PresentJob {
@@ -69,7 +81,8 @@ namespace yp2 {
             void close(Package &package);
             void search(Package &package, Z_APDU *apdu);
             void present(Package &package, Z_APDU *apdu);
-            void scan(Package &package, Z_APDU *apdu);
+            void scan1(Package &package, Z_APDU *apdu);
+            void scan2(Package &package, Z_APDU *apdu);
             Rep *m_p;
         };            
         struct Multi::Map {
@@ -80,7 +93,7 @@ namespace yp2 {
         };
         class Multi::Rep {
             friend class Multi;
-            friend class Frontend;
+            friend struct Frontend;
             
             FrontendPtr get_frontend(Package &package);
             void release_frontend(Package &package);
@@ -90,12 +103,12 @@ namespace yp2 {
             std::map<std::string,std::string> m_target_route;
             boost::mutex m_mutex;
             boost::condition m_cond_session_ready;
-            std::map<yp2::Session, FrontendPtr> m_clients;
+            std::map<mp::Session, FrontendPtr> m_clients;
         };
     }
 }
 
-using namespace yp2;
+using namespace mp;
 
 bool yf::Multi::BackendSet::operator < (const BackendSet &k) const
 {
@@ -116,7 +129,7 @@ yf::Multi::FrontendPtr yf::Multi::Rep::get_frontend(Package &package)
 {
     boost::mutex::scoped_lock lock(m_mutex);
 
-    std::map<yp2::Session,yf::Multi::FrontendPtr>::iterator it;
+    std::map<mp::Session,yf::Multi::FrontendPtr>::iterator it;
     
     while(true)
     {
@@ -140,7 +153,7 @@ yf::Multi::FrontendPtr yf::Multi::Rep::get_frontend(Package &package)
 void yf::Multi::Rep::release_frontend(Package &package)
 {
     boost::mutex::scoped_lock lock(m_mutex);
-    std::map<yp2::Session,yf::Multi::FrontendPtr>::iterator it;
+    std::map<mp::Session,yf::Multi::FrontendPtr>::iterator it;
     
     it = m_clients.find(package.session());
     if (it != m_clients.end())
@@ -202,6 +215,7 @@ void yf::Multi::Backend::operator() (void)
     m_package->move(m_route);
 }
 
+
 void yf::Multi::Frontend::close(Package &package)
 {
     std::list<BackendPtr>::const_iterator bit;
@@ -227,14 +241,9 @@ void yf::Multi::Frontend::multi_move(std::list<BackendPtr> &blist)
     g.join_all();
 }
 
-
 void yf::Multi::FrontendSet::round_robin(int start, int number,
                                          std::list<PresentJob> &jobs)
 {
-    int fetched = 0;
-    int p = 1;
-    bool eof = true;
-
     std::list<int> pos;
     std::list<int> inside_pos;
     std::list<BackendSet>::const_iterator bsit;
@@ -244,39 +253,84 @@ void yf::Multi::FrontendSet::round_robin(int start, int number,
         inside_pos.push_back(0);
     }
 
-    std::list<int>::iterator psit = pos.begin();
-    std::list<int>::iterator esit = inside_pos.begin();
-    bsit = m_backend_sets.begin();
-    while (fetched < number)
+    int p = 1;
+#if 1
+    // optimization step!
+    int omin = 0;
+    while(true)
     {
-        if (bsit == m_backend_sets.end())
+        int min = 0;
+        int no_left = 0;
+        // find min count for each set which is > omin
+        for (bsit = m_backend_sets.begin(); bsit != m_backend_sets.end(); bsit++)
         {
-            psit = pos.begin();
-            esit = inside_pos.begin();
-            bsit = m_backend_sets.begin();
-            if (eof)
-                break;
-            eof = true;
+            if (bsit->m_count > omin)
+            {
+                if (no_left == 0 || bsit->m_count < min)
+                    min = bsit->m_count;
+                no_left++;
+            }
+        }
+        if (no_left == 0) // if nothing greater than omin, bail out.
+            break;
+        int skip = no_left * min;
+        if (p + skip > start)  // step gets us "into" present range?
+        {
+            // Yes. skip until start.. Rounding off is deliberate!
+            min = (start-p) / no_left;
+            p += no_left * min;
+            
+            // update positions in each set..
+            std::list<int>::iterator psit = pos.begin();
+            for (psit = pos.begin(); psit != pos.end(); psit++)
+                *psit += min;
+            break;
         }
-        if (*psit <= bsit->m_count)
+        // skip on each set.. before "present range"..
+        p = p + skip;
+        
+        std::cout << "\nSKIP min=" << min << " no_left=" << no_left << "\n\n";
+        
+        std::list<int>::iterator psit = pos.begin();
+        for (psit = pos.begin(); psit != pos.end(); psit++)
+            *psit += min;
+        
+        omin = min; // update so we consider next class (with higher count)
+    }
+#endif
+    int fetched = 0;
+    bool more = true;
+    while (more)
+    {
+        more = false;
+        std::list<int>::iterator psit = pos.begin();
+        std::list<int>::iterator esit = inside_pos.begin();
+        bsit = m_backend_sets.begin();
+
+        for (; bsit != m_backend_sets.end(); psit++,esit++,bsit++)
         {
-            if (p >= start)
+            if (fetched >= number)
+            {
+                more = false;
+                break;
+            }
+            if (*psit <= bsit->m_count)
             {
-                PresentJob job;
-                job.m_backend = bsit->m_backend;
-                job.m_pos = *psit;
-                job.m_inside_pos = *esit;
-                jobs.push_back(job);
-                (*esit)++;
-                fetched++;
+                if (p >= start)
+                {
+                    PresentJob job;
+                    job.m_backend = bsit->m_backend;
+                    job.m_pos = *psit;
+                    job.m_inside_pos = *esit;
+                    jobs.push_back(job);
+                    (*esit)++;
+                    fetched++;
+                }
+                (*psit)++;
+                p++;
+                more = true;
             }
-            (*psit)++;
-            p++;
-            eof = false;
         }
-        psit++;
-        esit++;
-        bsit++;
     }
 }
 
@@ -286,7 +340,7 @@ void yf::Multi::Frontend::init(Package &package, Z_GDU *gdu)
 
     std::list<std::string> targets;
 
-    yp2::util::get_vhost_otherinfo(&req->otherInfo, false, targets);
+    mp::util::get_vhost_otherinfo(&req->otherInfo, false, targets);
 
     if (targets.size() < 1)
     {
@@ -313,13 +367,15 @@ void yf::Multi::Frontend::init(Package &package, Z_GDU *gdu)
     std::list<BackendPtr>::const_iterator bit;
     for (bit = m_backend_list.begin(); bit != m_backend_list.end(); bit++)
     {
-        yp2::odr odr;
+        mp::odr odr;
         BackendPtr b = *bit;
         Z_APDU *init_apdu = zget_APDU(odr, Z_APDU_initRequest);
         
-        yaz_oi_set_string_oidval(&init_apdu->u.initRequest->otherInfo, odr,
-                                 VAL_PROXY, 1, b->m_vhost.c_str());
-        
+        std::list<std::string>vhost_one;
+        vhost_one.push_back(b->m_vhost);
+        mp::util::set_vhost_otherinfo(&init_apdu->u.initRequest->otherInfo,
+                                       odr, vhost_one);
+
         Z_InitRequest *req = init_apdu->u.initRequest;
         
         ODR_MASK_SET(req->options, Z_Options_search);
@@ -338,7 +394,7 @@ void yf::Multi::Frontend::init(Package &package, Z_GDU *gdu)
     multi_move(m_backend_list);
 
     // create the frontend init response based on each backend init response
-    yp2::odr odr;
+    mp::odr odr;
 
     Z_APDU *f_apdu = odr.create_initResponse(gdu->u.z3950, 0, 0);
     Z_InitResponse *f_resp = f_apdu->u.initResponse;
@@ -412,9 +468,9 @@ void yf::Multi::Frontend::search(Package &package, Z_APDU *apdu_req)
     for (bit = m_backend_list.begin(); bit != m_backend_list.end(); bit++)
     {
         PackagePtr p = (*bit)->m_package;
-        yp2::odr odr;
+        mp::odr odr;
     
-        if (!yp2::util::set_databases_from_zurl(odr, (*bit)->m_vhost,
+        if (!mp::util::set_databases_from_zurl(odr, (*bit)->m_vhost,
                                                 &req->num_databaseNames,
                                                 &req->databaseNames))
         {
@@ -467,7 +523,7 @@ void yf::Multi::Frontend::search(Package &package, Z_APDU *apdu_req)
         }
     }
 
-    yp2::odr odr;
+    mp::odr odr;
     Z_APDU *f_apdu = odr.create_searchResponse(apdu_req, 0, 0);
     Z_SearchResponse *f_resp = f_apdu->u.searchResponse;
 
@@ -483,7 +539,7 @@ void yf::Multi::Frontend::search(Package &package, Z_APDU *apdu_req)
     m_sets[resultSet.m_setname] = resultSet;
 
     int number;
-    yp2::util::piggyback(smallSetUpperBound,
+    mp::util::piggyback(smallSetUpperBound,
                          largeSetLowerBound,
                          mediumSetPresentNumber,
                          result_set_size,
@@ -533,7 +589,7 @@ void yf::Multi::Frontend::present(Package &package, Z_APDU *apdu_req)
     it = m_sets.find(std::string(req->resultSetId));
     if (it == m_sets.end())
     {
-        yp2::odr odr;
+        mp::odr odr;
         Z_APDU *apdu = 
             odr.create_presentResponse(
                 apdu_req,
@@ -616,7 +672,7 @@ void yf::Multi::Frontend::present(Package &package, Z_APDU *apdu_req)
         }
     }
 
-    yp2::odr odr;
+    mp::odr odr;
     Z_APDU *f_apdu = odr.create_presentResponse(apdu_req, 0, 0);
     Z_PresentResponse *f_resp = f_apdu->u.presentResponse;
 
@@ -657,11 +713,11 @@ void yf::Multi::Frontend::present(Package &package, Z_APDU *apdu_req)
     package.response() = f_apdu;
 }
 
-void yf::Multi::Frontend::scan(Package &package, Z_APDU *apdu_req)
+void yf::Multi::Frontend::scan1(Package &package, Z_APDU *apdu_req)
 {
     if (m_backend_list.size() > 1)
     {
-        yp2::odr odr;
+        mp::odr odr;
         Z_APDU *f_apdu = 
             odr.create_scanResponse(
                 apdu_req, YAZ_BIB1_COMBI_OF_SPECIFIED_DATABASES_UNSUPP, 0);
@@ -677,9 +733,9 @@ void yf::Multi::Frontend::scan(Package &package, Z_APDU *apdu_req)
     for (bit = m_backend_list.begin(); bit != m_backend_list.end(); bit++)
     {
         PackagePtr p = (*bit)->m_package;
-        yp2::odr odr;
+        mp::odr odr;
     
-        if (!yp2::util::set_databases_from_zurl(odr, (*bit)->m_vhost,
+        if (!mp::util::set_databases_from_zurl(odr, (*bit)->m_vhost,
                                                 &req->num_databaseNames,
                                                 &req->databaseNames))
         {
@@ -714,6 +770,268 @@ void yf::Multi::Frontend::scan(Package &package, Z_APDU *apdu_req)
     }
 }
 
+bool yf::Multi::ScanTermInfo::operator < (const ScanTermInfo &k) const
+{
+    return m_norm_term < k.m_norm_term;
+}
+
+bool yf::Multi::ScanTermInfo::operator == (const ScanTermInfo &k) const
+{
+    return m_norm_term == k.m_norm_term;
+}
+
+Z_Entry *yf::Multi::ScanTermInfo::get_entry(ODR odr)
+{
+    Z_Entry *e = (Z_Entry *)odr_malloc(odr, sizeof(*e));
+    e->which = Z_Entry_termInfo;
+    Z_TermInfo *t;
+    t = e->u.termInfo = (Z_TermInfo *) odr_malloc(odr, sizeof(*t));
+    t->suggestedAttributes = 0;
+    t->displayTerm = 0;
+    t->alternativeTerm = 0;
+    t->byAttributes = 0;
+    t->otherTermInfo = 0;
+    t->globalOccurrences = odr_intdup(odr, m_count);
+    t->term = (Z_Term *)
+        odr_malloc(odr, sizeof(*t->term));
+    t->term->which = Z_Term_general;
+    Odr_oct *o;
+    t->term->u.general = o = (Odr_oct *)odr_malloc(odr, sizeof(Odr_oct));
+
+    o->len = o->size = m_norm_term.size();
+    o->buf = (unsigned char *) odr_malloc(odr, o->len);
+    memcpy(o->buf, m_norm_term.c_str(), o->len);
+    return e;
+}
+
+void yf::Multi::Frontend::scan2(Package &package, Z_APDU *apdu_req)
+{
+    Z_ScanRequest *req = apdu_req->u.scanRequest;
+
+    int default_num_db = req->num_databaseNames;
+    char **default_db = req->databaseNames;
+
+    std::list<BackendPtr>::const_iterator bit;
+    for (bit = m_backend_list.begin(); bit != m_backend_list.end(); bit++)
+    {
+        PackagePtr p = (*bit)->m_package;
+        mp::odr odr;
+    
+        if (!mp::util::set_databases_from_zurl(odr, (*bit)->m_vhost,
+                                                &req->num_databaseNames,
+                                                &req->databaseNames))
+        {
+            req->num_databaseNames = default_num_db;
+            req->databaseNames = default_db;
+        }
+        p->request() = apdu_req;
+        p->copy_filter(package);
+    }
+    multi_move(m_backend_list);
+
+    ScanTermInfoList entries_before;
+    ScanTermInfoList entries_after;
+    int no_before = 0;
+    int no_after = 0;
+
+    for (bit = m_backend_list.begin(); bit != m_backend_list.end(); bit++)
+    {
+        PackagePtr p = (*bit)->m_package;
+        
+        if (p->session().is_closed()) // if any backend closes, close frontend
+            package.session().close();
+        
+        Z_GDU *gdu = p->response().get();
+        if (gdu && gdu->which == Z_GDU_Z3950 && gdu->u.z3950->which ==
+            Z_APDU_scanResponse)
+        {
+            Z_ScanResponse *res = gdu->u.z3950->u.scanResponse;
+
+            if (res->entries && res->entries->nonsurrogateDiagnostics)
+            {
+                // failure
+                mp::odr odr;
+                Z_APDU *f_apdu = odr.create_scanResponse(apdu_req, 1, 0);
+                Z_ScanResponse *f_res = f_apdu->u.scanResponse;
+
+                f_res->entries->nonsurrogateDiagnostics = 
+                    res->entries->nonsurrogateDiagnostics;
+                f_res->entries->num_nonsurrogateDiagnostics = 
+                    res->entries->num_nonsurrogateDiagnostics;
+
+                package.response() = f_apdu;
+                return;
+            }
+
+            if (res->entries && res->entries->entries)
+            {
+                Z_Entry **entries = res->entries->entries;
+                int num_entries = res->entries->num_entries;
+                int position = 1;
+                if (req->preferredPositionInResponse)
+                    position = *req->preferredPositionInResponse;
+                if (res->positionOfTerm)
+                    position = *res->positionOfTerm;
+
+                // before
+                int i;
+                for (i = 0; i<position-1 && i<num_entries; i++)
+                {
+                    Z_Entry *ent = entries[i];
+
+                    if (ent->which == Z_Entry_termInfo)
+                    {
+                        ScanTermInfo my;
+
+                        int *occur = ent->u.termInfo->globalOccurrences;
+                        my.m_count = occur ? *occur : 0;
+
+                        if (ent->u.termInfo->term->which == Z_Term_general)
+                        {
+                            my.m_norm_term = std::string(
+                                (const char *)
+                                ent->u.termInfo->term->u.general->buf,
+                                ent->u.termInfo->term->u.general->len);
+                        }
+                        if (my.m_norm_term.length())
+                        {
+                            ScanTermInfoList::iterator it = 
+                                entries_before.begin();
+                            while (it != entries_before.end() && my <*it)
+                                it++;
+                            if (my == *it)
+                            {
+                                it->m_count += my.m_count;
+                            }
+                            else
+                            {
+                                entries_before.insert(it, my);
+                                no_before++;
+                            }
+                        }
+                    }
+                }
+                // after
+                if (position <= 0)
+                    i = 0;
+                else
+                    i = position-1;
+                for ( ; i<num_entries; i++)
+                {
+                    Z_Entry *ent = entries[i];
+
+                    if (ent->which == Z_Entry_termInfo)
+                    {
+                        ScanTermInfo my;
+
+                        int *occur = ent->u.termInfo->globalOccurrences;
+                        my.m_count = occur ? *occur : 0;
+
+                        if (ent->u.termInfo->term->which == Z_Term_general)
+                        {
+                            my.m_norm_term = std::string(
+                                (const char *)
+                                ent->u.termInfo->term->u.general->buf,
+                                ent->u.termInfo->term->u.general->len);
+                        }
+                        if (my.m_norm_term.length())
+                        {
+                            ScanTermInfoList::iterator it = 
+                                entries_after.begin();
+                            while (it != entries_after.end() && *it < my)
+                                it++;
+                            if (my == *it)
+                            {
+                                it->m_count += my.m_count;
+                            }
+                            else
+                            {
+                                entries_after.insert(it, my);
+                                no_after++;
+                            }
+                        }
+                    }
+                }
+
+            }                
+        }
+        else
+        {
+            // if any target does not return scan response - return that 
+            package.response() = p->response();
+            return;
+        }
+    }
+
+    if (true)
+    {
+        std::cout << "BEFORE\n";
+        ScanTermInfoList::iterator it = entries_before.begin();
+        for(; it != entries_before.end(); it++)
+        {
+            std::cout << " " << it->m_norm_term << " " << it->m_count << "\n";
+        }
+        
+        std::cout << "AFTER\n";
+        it = entries_after.begin();
+        for(; it != entries_after.end(); it++)
+        {
+            std::cout << " " << it->m_norm_term << " " << it->m_count << "\n";
+        }
+    }
+
+    if (false)
+    {
+        mp::odr odr;
+        Z_APDU *f_apdu = odr.create_scanResponse(apdu_req, 1, "not implemented");
+        package.response() = f_apdu;
+    }
+    else
+    {
+        mp::odr odr;
+        Z_APDU *f_apdu = odr.create_scanResponse(apdu_req, 0, 0);
+        Z_ScanResponse *resp = f_apdu->u.scanResponse;
+        
+        int number_returned = *req->numberOfTermsRequested;
+        int position_returned = *req->preferredPositionInResponse;
+        
+        resp->entries->num_entries = number_returned;
+        resp->entries->entries = (Z_Entry**)
+            odr_malloc(odr, sizeof(Z_Entry*) * number_returned);
+        int i;
+
+        int lbefore = entries_before.size();
+        if (lbefore < position_returned-1)
+            position_returned = lbefore+1;
+
+        ScanTermInfoList::iterator it = entries_before.begin();
+        for (i = 0; i<position_returned-1 && it != entries_before.end(); i++, it++)
+        {
+            resp->entries->entries[position_returned-2-i] = it->get_entry(odr);
+        }
+
+        it = entries_after.begin();
+
+        if (position_returned <= 0)
+            i = 0;
+        else
+            i = position_returned-1;
+        for (; i<number_returned && it != entries_after.end(); i++, it++)
+        {
+            resp->entries->entries[i] = it->get_entry(odr);
+        }
+
+        number_returned = i;
+
+        resp->positionOfTerm = odr_intdup(odr, position_returned);
+        resp->numberOfEntriesReturned = odr_intdup(odr, number_returned);
+        resp->entries->num_entries = number_returned;
+
+        package.response() = f_apdu;
+    }
+}
+
+
 void yf::Multi::process(Package &package) const
 {
     FrontendPtr f = m_p->get_frontend(package);
@@ -732,7 +1050,7 @@ void yf::Multi::process(Package &package) const
         Z_APDU *apdu = gdu->u.z3950;
         if (apdu->which == Z_APDU_initRequest)
         {
-            yp2::odr odr;
+            mp::odr odr;
             
             package.response() = odr.create_close(
                 apdu,
@@ -751,11 +1069,11 @@ void yf::Multi::process(Package &package) const
         }
         else if (apdu->which == Z_APDU_scanRequest)
         {
-            f->scan(package, apdu);
+            f->scan2(package, apdu);
         }
         else
         {
-            yp2::odr odr;
+            mp::odr odr;
             
             package.response() = odr.create_close(
                 apdu, Z_Close_protocolError,
@@ -767,7 +1085,7 @@ void yf::Multi::process(Package &package) const
     m_p->release_frontend(package);
 }
 
-void yp2::filter::Multi::configure(const xmlNode * ptr)
+void mp::filter::Multi::configure(const xmlNode * ptr)
 {
     for (ptr = ptr->children; ptr; ptr = ptr->next)
     {
@@ -775,8 +1093,8 @@ void yp2::filter::Multi::configure(const xmlNode * ptr)
             continue;
         if (!strcmp((const char *) ptr->name, "target"))
         {
-            std::string route = yp2::xml::get_route(ptr);
-            std::string target = yp2::xml::get_text(ptr);
+            std::string route = mp::xml::get_route(ptr);
+            std::string target = mp::xml::get_text(ptr);
             std::cout << "route=" << route << " target=" << target << "\n";
             m_p->m_target_route[target] = route;
         }
@@ -790,18 +1108,18 @@ void yp2::filter::Multi::configure(const xmlNode * ptr)
                 if (v_node->type != XML_ELEMENT_NODE)
                     continue;
                 
-                if (yp2::xml::is_element_yp2(v_node, "vhost"))
-                    vhost = yp2::xml::get_text(v_node);
-                else if (yp2::xml::is_element_yp2(v_node, "target"))
-                    targets.push_back(yp2::xml::get_text(v_node));
+                if (mp::xml::is_element_yp2(v_node, "vhost"))
+                    vhost = mp::xml::get_text(v_node);
+                else if (mp::xml::is_element_yp2(v_node, "target"))
+                    targets.push_back(mp::xml::get_text(v_node));
                 else
-                    throw yp2::filter::FilterException
+                    throw mp::filter::FilterException
                         ("Bad element " 
                          + std::string((const char *) v_node->name)
                          + " in virtual section"
                             );
             }
-            std::string route = yp2::xml::get_route(ptr);
+            std::string route = mp::xml::get_route(ptr);
             add_map_host2hosts(vhost, targets, route);
             std::list<std::string>::const_iterator it;
             for (it = targets.begin(); it != targets.end(); it++)
@@ -812,7 +1130,7 @@ void yp2::filter::Multi::configure(const xmlNode * ptr)
         }
         else
         {
-            throw yp2::filter::FilterException
+            throw mp::filter::FilterException
                 ("Bad element " 
                  + std::string((const char *) ptr->name)
                  + " in virt_db filter");
@@ -820,13 +1138,13 @@ void yp2::filter::Multi::configure(const xmlNode * ptr)
     }
 }
 
-static yp2::filter::Base* filter_creator()
+static mp::filter::Base* filter_creator()
 {
-    return new yp2::filter::Multi;
+    return new mp::filter::Multi;
 }
 
 extern "C" {
-    struct yp2_filter_struct yp2_filter_multi = {
+    struct metaproxy_1_filter_struct metaproxy_1_filter_multi = {
         0,
         "multi",
         filter_creator