Adding open-url lik generation to the normalization stylesheets.
[pazpar2-moved-to-github.git] / src / logic.c
index e131fb3..4146612 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: logic.c,v 1.47 2007-06-28 09:36:10 adam Exp $
+/* $Id: logic.c,v 1.61 2007-08-16 11:30:45 adam Exp $
    Copyright (c) 2006-2007, Index Data.
 
 This file is part of Pazpar2.
@@ -46,6 +46,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 #include <yaz/query-charset.h>
 #include <yaz/querytowrbuf.h>
 #include <yaz/oid_db.h>
+#include <yaz/snprintf.h>
 
 #if HAVE_CONFIG_H
 #include "cconfig.h"
@@ -92,10 +93,11 @@ struct parameters global_parameters =
     100,
     MAX_CHUNK,
     0,
-    0
+    0,
+    180,
+    30
 };
 
-
 // Recursively traverse query structure to extract terms.
 void pull_terms(NMEM nmem, struct ccl_rpn_node *n, char **termlist, int *num)
 {
@@ -170,12 +172,46 @@ xmlDoc *record_to_xml(struct session_database *sdb, Z_External *rec)
             return 0;
         }
     }
+    else if (rec->which == Z_External_OPAC)
+    {
+        if (!sdb->yaz_marc)
+        {
+            yaz_log(YLOG_WARN, "MARC decoding not configured");
+            return 0;
+        }
+        else
+        {
+            /* OPAC gets converted to XML too */
+            WRBUF wrbuf_opac = wrbuf_alloc();
+            /* MARCXML inside the OPAC XML. Charset is in effect because we
+               use the yaz_marc handle */
+            yaz_marc_xml(sdb->yaz_marc, YAZ_MARC_MARCXML);
+            yaz_opac_decode_wrbuf(sdb->yaz_marc, rec->u.opac, wrbuf_opac);
+            
+            rdoc = xmlParseMemory((char*) wrbuf_buf(wrbuf_opac),
+                                  wrbuf_len(wrbuf_opac));
+            if (!rdoc)
+            {
+                yaz_log(YLOG_WARN, "Unable to parse OPAC XML");
+                /* Was used to debug bug #1348 */
+#if 0
+                FILE *f = fopen("/tmp/opac.xml.txt", "wb");
+                if (f)
+                {
+                    fwrite(wrbuf_buf(wrbuf_opac), 1, wrbuf_len(wrbuf_opac), f);
+                    fclose(f);
+                }
+#endif
+            }
+            wrbuf_destroy(wrbuf_opac);
+        }
+    }
     else if (oid && yaz_oid_is_iso2709(oid))
     {
         /* ISO2709 gets converted to MARCXML */
         if (!sdb->yaz_marc)
         {
-            yaz_log(YLOG_FATAL, "Unable to handle ISO2709 record");
+            yaz_log(YLOG_WARN, "MARC decoding not configured");
             return 0;
         }
         else
@@ -198,7 +234,6 @@ xmlDoc *record_to_xml(struct session_database *sdb, Z_External *rec)
                 return 0;
             }
             
-            yaz_marc_write_using_libxml2(sdb->yaz_marc, 1);
             if (yaz_marc_write_xml(sdb->yaz_marc, &res,
                                    "http://www.loc.gov/MARC21/slim", 0, 0) < 0)
             {
@@ -220,31 +255,105 @@ xmlDoc *record_to_xml(struct session_database *sdb, Z_External *rec)
         return 0;
     }
     
-    if (global_parameters.dump_records){
-        fprintf(stderr, 
-                "Input Record (normalized) from %s\n----------------\n",
-                db->url);
+    if (global_parameters.dump_records)
+    {
+        FILE *lf = yaz_log_file();
+        if (lf)
+        {
+            yaz_log(YLOG_LOG, "Un-normalized record from %s", db->url);
 #if LIBXML_VERSION >= 20600
-        xmlDocFormatDump(stderr, rdoc, 1);
+            xmlDocFormatDump(lf, rdoc, 1);
 #else
-        xmlDocDump(stderr, rdoc);
+            xmlDocDump(lf, rdoc);
 #endif
+            fprintf(lf, "\n");
+        }
     }
     return rdoc;
 }
 
-xmlDoc *normalize_record(struct session_database *sdb, Z_External *rec)
+#define MAX_XSLT_ARGS 16
+
+// Add static values from session database settings if applicable
+static void insert_settings_parameters(struct session_database *sdb,
+        struct session *se, char **parms)
+{
+    struct conf_service *service = global_parameters.server->service;
+    int i;
+    int nparms = 0;
+    int offset = 0;
+
+    for (i = 0; i < service->num_metadata; i++)
+    {
+        struct conf_metadata *md = &service->metadata[i];
+        int setting;
+
+        if (md->setting == Metadata_setting_parameter &&
+                (setting = settings_offset(md->name)) > 0)
+        {
+            char *val = session_setting_oneval(sdb, setting);
+            if (val && nparms < MAX_XSLT_ARGS)
+            {
+                char *buf;
+                int len = strlen(val);
+                buf = nmem_malloc(se->nmem, len + 3);
+                buf[0] = '\'';
+                strcpy(buf + 1, val);
+                buf[len+1] = '\'';
+                buf[len+2] = '\0';
+                parms[offset++] = md->name;
+                parms[offset++] = buf;
+                nparms++;
+            }
+        }
+    }
+    parms[offset] = 0;
+}
+
+// Add static values from session database settings if applicable
+static void insert_settings_values(struct session_database *sdb, xmlDoc *doc)
+{
+    struct conf_service *service = global_parameters.server->service;
+    int i;
+
+    for (i = 0; i < service->num_metadata; i++)
+    {
+        struct conf_metadata *md = &service->metadata[i];
+        int offset;
+
+        if (md->setting == Metadata_setting_postproc &&
+                (offset = settings_offset(md->name)) > 0)
+        {
+            char *val = session_setting_oneval(sdb, offset);
+            if (val)
+            {
+                xmlNode *r = xmlDocGetRootElement(doc);
+                xmlNode *n = xmlNewTextChild(r, 0, (xmlChar *) "metadata",
+                        (xmlChar *) val);
+                xmlSetProp(n, (xmlChar *) "type", (xmlChar *) md->name);
+            }
+        }
+    }
+}
+
+xmlDoc *normalize_record(struct session_database *sdb, struct session *se,
+        Z_External *rec)
 {
     struct database_retrievalmap *m;
     xmlDoc *rdoc = record_to_xml(sdb, rec);
     if (rdoc)
     {
-        for (m = sdb->map; m; m = m->next){
+        for (m = sdb->map; m; m = m->next)
+        {
             xmlDoc *new = 0;
             
             {
                 xmlNodePtr root = 0;
-                new = xsltApplyStylesheet(m->stylesheet, rdoc, 0);
+                char *parms[MAX_XSLT_ARGS*2+1];
+
+                insert_settings_parameters(sdb, se, parms);
+
+                new = xsltApplyStylesheet(m->stylesheet, rdoc, (const char **) parms);
                 root= xmlDocGetRootElement(new);
                 if (!new || !root || !(root->children))
                 {
@@ -259,15 +368,24 @@ xmlDoc *normalize_record(struct session_database *sdb, Z_External *rec)
             xmlFreeDoc(rdoc);
             rdoc = new;
         }
+
+        insert_settings_values(sdb, rdoc);
+
         if (global_parameters.dump_records)
         {
-            fprintf(stderr, "Record from %s\n----------------\n", 
-                    sdb->database->url);
+            FILE *lf = yaz_log_file();
+            
+            if (lf)
+            {
+                yaz_log(YLOG_LOG, "Normalized record from %s", 
+                        sdb->database->url);
 #if LIBXML_VERSION >= 20600
-            xmlDocFormatDump(stderr, rdoc, 1);
+                xmlDocFormatDump(lf, rdoc, 1);
 #else
-            xmlDocDump(stderr, rdoc);
+                xmlDocDump(lf, rdoc);
 #endif
+                fprintf(lf, "\n");
+            }
         }
     }
     return rdoc;
@@ -339,7 +457,30 @@ static int prepare_map(struct session *se, struct session_database *sdb)
         char **stylesheets;
         struct database_retrievalmap **m = &sdb->map;
         int num, i;
+        char auto_stylesheet[256];
 
+        if (!strcmp(s, "auto"))
+        {
+            char *request_syntax = session_setting_oneval(sdb,
+                                                          PZ_REQUESTSYNTAX);
+            if (request_syntax)
+            {
+                char *cp;
+                yaz_snprintf(auto_stylesheet, sizeof(auto_stylesheet),
+                             "%s.xsl", request_syntax);
+                for (cp = auto_stylesheet; *cp; cp++)
+                {
+                    /* deliberately only consider ASCII */
+                    if (*cp > 32 && *cp < 127)
+                        *cp = tolower(*cp);
+                }
+                s = auto_stylesheet;
+            }
+            else
+            {
+                yaz_log(YLOG_WARN, "No pz:requestsyntax for auto stylesheet");
+            }
+        }
         nmem_strsplit(se->session_nmem, ",", s, &stylesheets, &num);
         for (i = 0; i < num; i++)
         {
@@ -497,6 +638,8 @@ enum pazpar2_error_code search(struct session *se,
                                const char **addinfo)
 {
     int live_channels = 0;
+    int no_working = 0;
+    int no_failed = 0;
     struct client *cl;
     struct database_criterion *criteria;
 
@@ -504,23 +647,22 @@ enum pazpar2_error_code search(struct session *se,
 
     *addinfo = 0;
     nmem_reset(se->nmem);
+    se->relevance = 0;
+    se->total_records = se->total_hits = se->total_merged = 0;
+    se->reclist = 0;
+    se->num_termlists = 0;
     criteria = parse_filter(se->nmem, filter);
     se->requestid++;
     live_channels = select_targets(se, criteria);
     if (live_channels)
     {
         int maxrecs = live_channels * global_parameters.toget;
-        se->num_termlists = 0;
         se->reclist = reclist_create(se->nmem, maxrecs);
-        // This will be initialized in send_search()
-        se->total_records = se->total_hits = se->total_merged = 0;
         se->expected_maxrecs = maxrecs;
     }
     else
         return PAZPAR2_NO_TARGETS;
 
-    se->relevance = 0;
-
     for (cl = se->clients; cl; cl = client_next_in_session(cl))
     {
         if (prepare_session_database(se, client_get_database(cl)) < 0)
@@ -528,17 +670,22 @@ enum pazpar2_error_code search(struct session *se,
             *addinfo = client_get_database(cl)->database->url;
             return PAZPAR2_CONFIG_TARGET;
         }
-        // Query must parse for all targets
+        // Parse query for target
         if (client_parse_query(cl, query) < 0)
+            no_failed++;
+        else
         {
-            *addinfo = "query";
-            return PAZPAR2_MALFORMED_PARAMETER_VALUE;
+            no_working++;
+            client_prep_connection(cl);
         }
     }
 
-    for (cl = se->clients; cl; cl = client_next_in_session(cl))
-        client_prep_connection(cl);
-
+    // If no queries could be mapped, we signal an error
+    if (no_working == 0)
+    {
+        *addinfo = "query";
+        return PAZPAR2_MALFORMED_PARAMETER_VALUE;
+    }
     return PAZPAR2_NO_ERROR;
 }
 
@@ -757,13 +904,13 @@ void report_nmem_stats(void)
 }
 #endif
 
-struct record_cluster *show_single(struct session *s, int id)
+struct record_cluster *show_single(struct session *s, const char *id)
 {
     struct record_cluster *r;
 
     reclist_rewind(s->reclist);
     while ((r = reclist_read_record(s->reclist)))
-        if (r->recid == id)
+        if (!strcmp(r->recid, id))
             return r;
     return 0;
 }
@@ -915,10 +1062,35 @@ void pazpar2_event_loop()
     event_loop(&channel_list);
 }
 
+static struct record_metadata *record_metadata_init(
+    NMEM nmem, char *value, enum conf_metadata_type type)
+{
+    struct record_metadata *rec_md = record_metadata_create(nmem);
+    if (type == Metadata_type_generic)
+    {
+        char * p = value;
+        p = normalize7bit_generic(p, " ,/.:([");
+        
+        rec_md->data.text = nmem_strdup(nmem, p);
+    }
+    else if (type == Metadata_type_year)
+    {
+        int first, last;
+        if (extract7bit_years((char *) value, &first, &last) < 0)
+            return 0;
+        rec_md->data.number.min = first;
+        rec_md->data.number.max = last;
+    }
+    else
+        return 0;
+    return rec_md;
+}
+
 struct record *ingest_record(struct client *cl, Z_External *rec,
                              int record_no)
 {
-    xmlDoc *xdoc = normalize_record(client_get_database(cl), rec);
+    xmlDoc *xdoc = normalize_record(client_get_database(cl),
+        client_get_session(cl), rec);
     xmlNode *root, *n;
     struct record *record;
     struct record_cluster *cluster;
@@ -952,7 +1124,7 @@ struct record *ingest_record(struct client *cl, Z_External *rec,
                              record, (char *) mergekey_norm, 
                              &se->total_merged);
     if (global_parameters.dump_records)
-        yaz_log(YLOG_LOG, "Cluster id %d from %s (#%d)", cluster->recid,
+        yaz_log(YLOG_LOG, "Cluster id %s from %s (#%d)", cluster->recid,
                 client_get_database(cl)->database->url, record_no);
     if (!cluster)
     {
@@ -982,12 +1154,11 @@ struct record *ingest_record(struct client *cl, Z_External *rec,
             struct record_metadata *rec_md = 0;
             int md_field_id = -1;
             int sk_field_id = -1;
-            int first, last;
 
             type = xmlGetProp(n, (xmlChar *) "type");
             value = xmlNodeListGetString(xdoc, n->children, 1);
 
-            if (!type || !value)
+            if (!type || !value || !*value)
                 continue;
 
             md_field_id 
@@ -1006,37 +1177,22 @@ struct record *ingest_record(struct client *cl, Z_External *rec,
                 ser_sk = &service->sortkeys[sk_field_id];
             }
 
-            // Find out where we are putting it - based on merge or not
-            if (ser_md->merge == Metadata_merge_no)
-                wheretoput = &record->metadata[md_field_id];
-            else
-                wheretoput = &cluster->metadata[md_field_id];
-            
-            // create new record_metadata
-            rec_md = record_metadata_create(se->nmem);
-
-            // and polulate with data:
-            // type based charmapping decisions follow here
-            if (ser_md->type == Metadata_type_generic)
-            {
-
-                char * p = (char *) value;
-                p = normalize7bit_generic(p, " ,/.:([");
-                
-                rec_md->data.text = nmem_strdup(se->nmem, p);
-
-            }
-            else if (ser_md->type == Metadata_type_year)
-            {
-                if (extract7bit_years((char *) value, &first, &last) < 0)
-                    continue;
-            }
-            else
+            // non-merged metadata
+            rec_md = record_metadata_init(se->nmem, (char *) value,
+                                          ser_md->type);
+            if (!rec_md)
             {
-                yaz_log(YLOG_WARN, 
-                        "Unknown type in metadata element %s", type);
+                yaz_log(YLOG_WARN, "bad metadata data '%s' for element '%s'",
+                        value, type);
                 continue;
             }
+            rec_md->next = record->metadata[md_field_id];
+            record->metadata[md_field_id] = rec_md;
+
+            // merged metadata
+            rec_md = record_metadata_init(se->nmem, (char *) value,
+                                          ser_md->type);
+            wheretoput = &cluster->metadata[md_field_id];
 
             // and polulate with data:
             // assign cluster or record based on merge action
@@ -1073,8 +1229,7 @@ struct record *ingest_record(struct client *cl, Z_External *rec,
                     }
                 }
             }
-            else if (ser_md->merge == Metadata_merge_all 
-                     || ser_md->merge == Metadata_merge_no)
+            else if (ser_md->merge == Metadata_merge_all)
             {
                 rec_md->next = *wheretoput;
                 *wheretoput = rec_md;
@@ -1084,18 +1239,18 @@ struct record *ingest_record(struct client *cl, Z_External *rec,
                 if (!*wheretoput)
                 {
                     *wheretoput = rec_md;
-                    (*wheretoput)->data.number.min = first;
-                    (*wheretoput)->data.number.max = last;
                     if (ser_sk)
                         cluster->sortkeys[sk_field_id] 
                             = &rec_md->data;
                 }
                 else
                 {
-                    if (first < (*wheretoput)->data.number.min)
-                        (*wheretoput)->data.number.min = first;
-                    if (last > (*wheretoput)->data.number.max)
-                        (*wheretoput)->data.number.max = last;
+                    int this_min = rec_md->data.number.min;
+                    int this_max = rec_md->data.number.max;
+                    if (this_min < (*wheretoput)->data.number.min)
+                        (*wheretoput)->data.number.min = this_min;
+                    if (this_max > (*wheretoput)->data.number.max)
+                        (*wheretoput)->data.number.max = this_max;
                 }
 #ifdef GAGA
                 if (ser_sk)
@@ -1120,11 +1275,11 @@ struct record *ingest_record(struct client *cl, Z_External *rec,
                 if (ser_md->type == Metadata_type_year)
                 {
                     char year[64];
-                    sprintf(year, "%d", last);
+                    sprintf(year, "%d", rec_md->data.number.max);
                     add_facet(se, (char *) type, year);
-                    if (first != last)
+                    if (rec_md->data.number.max != rec_md->data.number.min)
                     {
-                        sprintf(year, "%d", first);
+                        sprintf(year, "%d", rec_md->data.number.min);
                         add_facet(se, (char *) type, year);
                     }
                 }