WRBUF updates.
[idzebra-moved-to-github.git] / index / retrieve.c
index d981ec5..2627998 100644 (file)
@@ -1,5 +1,5 @@
-/* $Id: retrieve.c,v 1.57 2006-11-23 14:01:21 adam Exp $
-   Copyright (C) 1995-2006
+/* $Id: retrieve.c,v 1.67 2007-03-19 21:50:39 adam Exp $
+   Copyright (C) 1995-2007
    Index Data ApS
 
 This file is part of the Zebra server.
@@ -37,19 +37,19 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <direntz.h>
 
 
-
+#define ZEBRA_XML_HEADER_STR "<record xmlns=\"http://www.indexdata.com/zebra/\""
 
 static int zebra_create_record_stream(ZebraHandle zh, 
                                Record *rec,
-                               struct ZebraRecStream *stream){
-
+                               struct ZebraRecStream *stream)
+{
     RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, *rec);
 
-    if ((*rec)->size[recInfo_storeData] > 0){ 
+    if ((*rec)->size[recInfo_storeData] > 0)
         zebra_create_stream_mem(stream, (*rec)->info[recInfo_storeData],
                                 (*rec)->size[recInfo_storeData]);
-    }
-    else {
+    else
+    {
         char full_rep[1024];
         int fd;
             
@@ -65,7 +65,7 @@ static int zebra_create_record_stream(ZebraHandle zh,
             yaz_log (YLOG_WARN|YLOG_ERRNO, "Retrieve fail; missing file: %s",
                      full_rep);
             rec_free(rec);
-            return 14;
+            return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
         }
         zebra_create_stream_fd(stream, fd, recordAttr->recordOffset);
     }
@@ -75,8 +75,8 @@ static int zebra_create_record_stream(ZebraHandle zh,
 
 
 static int parse_zebra_elem(const char *elem,
-                             const char **index, size_t *index_len,
-                             const char **type, size_t *type_len)
+                            const char **index, size_t *index_len,
+                            const char **type, size_t *type_len)
 {
     *index = 0;
     *index_len = 0;
@@ -119,26 +119,120 @@ static int parse_zebra_elem(const char *elem,
 }
 
 
+int zebra_special_sort_fetch(ZebraHandle zh, zint sysno, ODR odr,
+                             const char *elemsetname,
+                             oid_value input_format,
+                             oid_value *output_format,
+                             char **rec_bufp, int *rec_lenp)
+{
+    const char *retrieval_index;
+    size_t retrieval_index_len; 
+    const char *retrieval_type;
+    size_t retrieval_type_len;
+    char retrieval_index_cstr[256];
+    int ord;
+
+    /* only accept XML and SUTRS requests */
+    if (input_format != VAL_TEXT_XML && input_format != VAL_SUTRS)
+    {
+        yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s", 
+                elemsetname);
+        *output_format = VAL_NONE;
+        return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
+    }
+    
+    if (!parse_zebra_elem(elemsetname,
+                          &retrieval_index, &retrieval_index_len,
+                          &retrieval_type,  &retrieval_type_len))
+    {
+        return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
+    }
+    
+    if (retrieval_type_len == 0)
+        return -1;   /* must have a register type specified */
+    if (!retrieval_index_len ||
+        retrieval_index_len >= sizeof(retrieval_index_cstr)-1)
+    {
+        return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
+    }
+        
+    memcpy(retrieval_index_cstr, retrieval_index, retrieval_index_len);
+    retrieval_index_cstr[retrieval_index_len] = '\0';
+
+    ord = zebraExplain_lookup_attr_str(zh->reg->zei,
+                                       zinfo_index_category_sort,
+                                       retrieval_type[0],
+                                       retrieval_index_cstr);
+    if (ord == -1)
+        return -1;  /* is not a sort index */
+    else
+    {
+        char dst_buf[IT_MAX_WORD];
+        char str[IT_MAX_WORD];
+        int index_type;
+        const char *db = 0;
+        const char *string_index = 0;
+        WRBUF wrbuf = wrbuf_alloc();
+        
+        zebra_sort_sysno(zh->reg->sort_index, sysno);
+        zebra_sort_type(zh->reg->sort_index, ord);
+        zebra_sort_read(zh->reg->sort_index, str);
+
+        zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db, &string_index);
+        
+        zebra_term_untrans(zh, index_type, dst_buf, str);
+        
+
+        if (input_format == VAL_TEXT_XML)
+        {
+            *output_format = VAL_TEXT_XML;
+            wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
+                         " sysno=\"" ZINT_FORMAT "\""
+                         " set=\"zebra::index%s/\">\n",
+                         sysno, elemsetname);
+
+            wrbuf_printf(wrbuf, "  <index name=\"%s\"", 
+                         string_index);
+            wrbuf_printf(wrbuf, " type=\"%c\">", index_type);
+            wrbuf_xmlputs(wrbuf, dst_buf);
+            wrbuf_printf(wrbuf, "</index>\n");
+            wrbuf_printf(wrbuf, "</record>\n");
+        }
+        else if (input_format == VAL_SUTRS)
+        {
+            *output_format = VAL_SUTRS;
+            
+            wrbuf_printf(wrbuf, "%s %c %s\n", string_index, index_type,
+                         dst_buf);
+        }
+        *rec_lenp = wrbuf_len(wrbuf);
+        *rec_bufp = odr_malloc(odr, *rec_lenp);
+        memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp);
+        wrbuf_destroy(wrbuf);
+        return 0;
+    }
+}
+                            
 int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr,
-                          Record rec,
-                          const char *elemsetname,
-                          oid_value input_format,
-                          oid_value *output_format,
-                          char **rec_bufp, int *rec_lenp)
+                              Record rec,
+                              const char *elemsetname,
+                              oid_value input_format,
+                              oid_value *output_format,
+                              char **rec_bufp, int *rec_lenp)
 {
     const char *retrieval_index;
     size_t retrieval_index_len; 
     const char *retrieval_type;
     size_t retrieval_type_len;
-    WRBUF wrbuf = 0;
     zebra_rec_keys_t keys;
+    int ret_code = 0;
     
     /* set output variables before processing possible error states */
     /* *rec_lenp = 0; */
 
     /* only accept XML and SUTRS requests */
-    if (input_format != VAL_TEXT_XML
-        && input_format != VAL_SUTRS){
+    if (input_format != VAL_TEXT_XML && input_format != VAL_SUTRS)
+    {
         yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s", 
                 elemsetname);
         *output_format = VAL_NONE;
@@ -159,7 +253,7 @@ int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr,
     {
         char retrieval_index_cstr[256];
 
-        if (retrieval_index_len  < sizeof(retrieval_index_cstr) -1)
+        if (retrieval_index_len < sizeof(retrieval_index_cstr) -1)
         {
             memcpy(retrieval_index_cstr, retrieval_index, retrieval_index_len);
             retrieval_index_cstr[retrieval_index_len] = '\0';
@@ -177,20 +271,22 @@ int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr,
     zebra_rec_keys_set_buf(keys, rec->info[recInfo_delKeys],
                            rec->size[recInfo_delKeys], 0);
 
-    wrbuf = wrbuf_alloc();
-    if (zebra_rec_keys_rewind(keys)){
+    if (!zebra_rec_keys_rewind(keys))
+    {
+        ret_code = 
+            YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
+    }
+    else
+    {
         size_t slen;
         const char *str;
         struct it_key key_in;
-
-        if (input_format == VAL_TEXT_XML){
+        WRBUF wrbuf = wrbuf_alloc();
+    
+        if (input_format == VAL_TEXT_XML)
+        {
             *output_format = VAL_TEXT_XML;
-            /*wrbuf_printf(wrbuf, 
-              "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");*/
-            
-            wrbuf_printf(wrbuf, 
-                         "<record xmlns="
-                         "\"http://www.indexdata.com/zebra/\""
+            wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
                          " sysno=\"" ZINT_FORMAT "\""
                          " set=\"zebra::index%s/\">\n",
                          sysno, elemsetname);
@@ -198,8 +294,8 @@ int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr,
         else if (input_format == VAL_SUTRS)
             *output_format = VAL_SUTRS;
 
-
-        while(zebra_rec_keys_read(keys, &str, &slen, &key_in)){
+        while (zebra_rec_keys_read(keys, &str, &slen, &key_in))
+        {
             int i;
             int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
             int index_type;
@@ -217,17 +313,16 @@ int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr,
             if (retrieval_index == 0 
                 || (string_index_len == retrieval_index_len 
                     && !memcmp(string_index, retrieval_index,
-                               string_index_len))){
-               
+                               string_index_len)))
+            {
                 /* process only if type is not defined, or is matching */
                 if (retrieval_type == 0 
                     || (retrieval_type_len == 1 
-                        && retrieval_type[0] == index_type)){
-                    
-
+                        && retrieval_type[0] == index_type))
+                {
                     zebra_term_untrans(zh, index_type, dst_buf, str);
-                    if (strlen(dst_buf)){
-
+                    if (strlen(dst_buf))
+                    {
                         if (input_format == VAL_TEXT_XML){
                             wrbuf_printf(wrbuf, "  <index name=\"%s\"", 
                                          string_index);
@@ -261,16 +356,45 @@ int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr,
         }
         if (input_format == VAL_TEXT_XML)
             wrbuf_printf(wrbuf, "</record>\n");
-     }
-    *rec_lenp = wrbuf_len(wrbuf);
-    *rec_bufp = odr_malloc(odr, *rec_lenp);
-    memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp);
-    wrbuf_free(wrbuf, 1);
+        *rec_lenp = wrbuf_len(wrbuf);
+        *rec_bufp = odr_malloc(odr, *rec_lenp);
+        memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp);
+        wrbuf_destroy(wrbuf);
+    }
     zebra_rec_keys_close(keys);
-    return 0;
+    return ret_code;
+}
+
+
+static void retrieve_puts_attr(WRBUF wrbuf, const char *name,
+                               const char *value)
+{
+    if (value)
+    {
+        wrbuf_printf(wrbuf, " %s=\"", name);
+        wrbuf_xmlputs(wrbuf, value);
+        wrbuf_printf(wrbuf, "\"");
+    }
 }
 
+static void retrieve_puts_attr_int(WRBUF wrbuf, const char *name,
+                               const int value)
+{
+    wrbuf_printf(wrbuf, " %s=\"%i\"", name, value);
+}
 
+static void retrieve_puts_str(WRBUF wrbuf, const char *name,
+                               const char *value)
+{
+    if (value)
+        wrbuf_printf(wrbuf, "%s %s\n", name, value);
+}
+
+static void retrieve_puts_int(WRBUF wrbuf, const char *name,
+                               const int value)
+{
+    wrbuf_printf(wrbuf, "%s %i\n", name, value);
+}
 
 int zebra_special_fetch(ZebraHandle zh, zint sysno, int score, ODR odr,
                            const char *elemsetname,
@@ -283,112 +407,138 @@ int zebra_special_fetch(ZebraHandle zh, zint sysno, int score, ODR odr,
     /* set output variables before processing possible error states */
     /* *rec_lenp = 0; */
 
-    /* only accept XML and SUTRS requests */
-    if (input_format != VAL_TEXT_XML
-        && input_format != VAL_SUTRS){
-        yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s", 
-                elemsetname);
-        return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
-    }
-    
+
+
     /* processing zebra::meta::sysno elemset without fetching binary data */
-    if (elemsetname  && 0 == strcmp(elemsetname, "meta::sysno"))
+    if (elemsetname && 0 == strcmp(elemsetname, "meta::sysno"))
     {
-       char rec_str[128];
-        if (input_format == VAL_SUTRS){
-            sprintf(rec_str, ZINT_FORMAT, sysno);
+        int ret = 0;
+        WRBUF wrbuf = wrbuf_alloc();
+        if (input_format == VAL_SUTRS)
+        {
+            wrbuf_printf(wrbuf, ZINT_FORMAT, sysno);
             *output_format = VAL_SUTRS;
         } 
-        else if (input_format == VAL_TEXT_XML){
-            sprintf(rec_str, "<record xmlns="
-                    "\"http://www.indexdata.com/zebra/\""
-                             " sysno=\"" ZINT_FORMAT "\""
-                             " set=\"zebra::%s\"/>\n",
-                    sysno, elemsetname);
+        else if (input_format == VAL_TEXT_XML)
+        {
+            wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
+                         " sysno=\"" ZINT_FORMAT "\"/>\n",
+                         sysno);
             *output_format = VAL_TEXT_XML;
         }
-       *rec_lenp = strlen(rec_str);
-        if (*rec_lenp){
-            *rec_bufp = odr_strdup(odr, rec_str);
-            return 0;
-        } else {
-            return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
-        }
+       *rec_lenp = wrbuf_len(wrbuf);
+        if (*rec_lenp)
+            *rec_bufp = odr_strdup(odr, wrbuf_cstr(wrbuf));
+        else
+            ret = YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
+        wrbuf_destroy(wrbuf);
+        return ret;
+    }
+
+    /* processing special elementsetname zebra::index:: for sort elements */
+    if (elemsetname && 0 == strncmp(elemsetname, "index", 5))
+    {
+        int ret = zebra_special_sort_fetch(zh, sysno, odr,
+                                           elemsetname + 5,
+                                           input_format, output_format,
+                                           rec_bufp, rec_lenp);
+        if (ret != -1)
+            return ret;
+        /* not a sort index so we continue to get the full record */
     }
 
+
     /* fetching binary record up for all other display elementsets */
     rec = rec_get(zh->reg->records, sysno);
-    if (!rec){
+    if (!rec)
+    {
         yaz_log(YLOG_WARN, "rec_get fail on sysno=" ZINT_FORMAT, sysno);
         return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
     }
 
+    /* processing special elementsetnames zebra::data */    
+    if (elemsetname && 0 == strcmp(elemsetname, "data"))
+    {
+        struct ZebraRecStream stream;
+        RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, rec); 
+        zebra_create_record_stream(zh, &rec, &stream);
+        *output_format = input_format;
+        *rec_lenp = recordAttr->recordSize;
+        *rec_bufp = (char *) odr_malloc(odr, *rec_lenp);
+        stream.readf(&stream, *rec_bufp, *rec_lenp);
+        stream.destroy(&stream);
+        rec_free(&rec);
+        return 0;
+    }
+
+    /* only accept XML and SUTRS requests from now */
+    if (input_format != VAL_TEXT_XML && input_format != VAL_SUTRS)
+    {
+        yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s", 
+                elemsetname);
+        return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
+    }
+    
+
     /* processing special elementsetnames zebra::meta:: */
-    if (elemsetname && 0 == strcmp(elemsetname, "meta")){
+    if (elemsetname && 0 == strcmp(elemsetname, "meta"))
+    {
         int ret = 0;
-        char rec_str[1024];
+        WRBUF wrbuf = wrbuf_alloc();
         RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, rec); 
 
-        if (input_format == VAL_TEXT_XML){
+        if (input_format == VAL_TEXT_XML)
+        {
             *output_format = VAL_TEXT_XML;
-
-             sprintf(rec_str, 
-                     "<record xmlns="
-                     "\"http://www.indexdata.com/zebra/\""
-                     " sysno=\"" ZINT_FORMAT "\""
-                     " base=\"%.*s\""
-                     " file=\"%.*s\""
-                     " type=\"%.*s\""
-                     " score=\"%i\""
-                     " rank=\"" ZINT_FORMAT "\""
-                     " size=\"%i\""
-                     " set=\"zebra::%s/\">\n",
-                     sysno, 
-                     rec->size[recInfo_databaseName], rec->info[recInfo_databaseName],
-                     rec->size[recInfo_filename], rec->info[recInfo_filename],
-                     rec->size[recInfo_fileType], rec->info[recInfo_fileType],
-                     score,
-                     recordAttr->staticrank,
-                     recordAttr->recordSize,
-                     elemsetname);
+            
+            wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
+                         " sysno=\"" ZINT_FORMAT "\"", sysno);
+            retrieve_puts_attr(wrbuf, "base", rec->info[recInfo_databaseName]);
+            retrieve_puts_attr(wrbuf, "file", rec->info[recInfo_filename]);
+            retrieve_puts_attr(wrbuf, "type", rec->info[recInfo_fileType]);
+            if (score >= 0)
+                retrieve_puts_attr_int(wrbuf, "score", score);
+           
+            wrbuf_printf(wrbuf,
+                         " rank=\"" ZINT_FORMAT "\""
+                         " size=\"%i\""
+                         " set=\"zebra::%s\"/>\n",
+                         recordAttr->staticrank,
+                         recordAttr->recordSize,
+                         elemsetname);
         }
-        else if (input_format == VAL_SUTRS){
+        else if (input_format == VAL_SUTRS)
+        {
             *output_format = VAL_SUTRS;
-             sprintf(rec_str, 
-                     "sysno " ZINT_FORMAT "\n"
-                     "base %.*s\n"
-                     "file %.*s\n"
-                     "type %.*s\n"
-                     "score %i\n"
-                     "rank " ZINT_FORMAT "\n"
-                     "size %i\n"
-                     "set zebra::%s\n",
-                     sysno, 
-                     rec->size[recInfo_databaseName], rec->info[recInfo_databaseName],
-                     rec->size[recInfo_filename], rec->info[recInfo_filename],
-                     rec->size[recInfo_fileType], rec->info[recInfo_fileType],
-                     score,
-                     recordAttr->staticrank,
-                     recordAttr->recordSize,
-                     elemsetname);
+            wrbuf_printf(wrbuf, "sysno " ZINT_FORMAT "\n", sysno);
+            retrieve_puts_str(wrbuf, "base", rec->info[recInfo_databaseName]);
+            retrieve_puts_str(wrbuf, "file", rec->info[recInfo_filename]);
+            retrieve_puts_str(wrbuf, "type", rec->info[recInfo_fileType]);
+            if (score >= 0)
+                retrieve_puts_int(wrbuf, "score", score);
+
+            wrbuf_printf(wrbuf,
+                         "rank " ZINT_FORMAT "\n"
+                         "size %i\n"
+                         "set zebra::%s\n",
+                         recordAttr->staticrank,
+                         recordAttr->recordSize,
+                         elemsetname);
         }
-        
-        
-       *rec_lenp = strlen(rec_str);
-        if (*rec_lenp){
-            *rec_bufp = odr_strdup(odr, rec_str);
-            ret = 0;
-        } else {
+       *rec_lenp = wrbuf_len(wrbuf);
+        if (*rec_lenp)
+            *rec_bufp = odr_strdup(odr, wrbuf_cstr(wrbuf));
+        else
             ret = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
-        }
 
+        wrbuf_destroy(wrbuf);
         rec_free(&rec);
         return ret;
     }
 
     /* processing special elementsetnames zebra::index:: */
-    if (elemsetname && 0 == strncmp(elemsetname, "index", 5)){
-        
+    if (elemsetname && 0 == strncmp(elemsetname, "index", 5))
+    {
         int ret = zebra_special_index_fetch(zh, sysno, odr, rec,
                                             elemsetname + 5,
                                             input_format, output_format,
@@ -398,20 +548,6 @@ int zebra_special_fetch(ZebraHandle zh, zint sysno, int score, ODR odr,
         return ret;
     }
 
-    /* processing special elementsetnames zebra::data */    
-    if (elemsetname && 0 == strcmp(elemsetname, "data")){
-        struct ZebraRecStream stream;
-        RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, rec); 
-        zebra_create_record_stream(zh, &rec, &stream);
-        *output_format = input_format;
-        *rec_lenp = recordAttr->recordSize;
-        *rec_bufp = (char *) odr_malloc(odr, *rec_lenp);
-        stream.readf(&stream, *rec_bufp, *rec_lenp);
-        stream.destroy(&stream);
-        rec_free(&rec);
-        return 0;
-    }
-
     if (rec)
         rec_free(&rec);
     return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
@@ -466,10 +602,10 @@ int zebra_record_fetch(ZebraHandle zh, zint sysno, int score,
     yaz_log(YLOG_DEBUG, "retrieve localno=" ZINT_FORMAT " score=%d",
             sysno, score);
 
-    zebra_create_record_stream(zh, &rec, &stream);
-    
+    return_code = zebra_create_record_stream(zh, &rec, &stream);
+
+    if (rec)
     {
-       /* snippets code */
        zebra_snippets *snippet;
        zebra_rec_keys_t reckeys = zebra_rec_keys_open();
         RecType rt;
@@ -520,6 +656,12 @@ int zebra_record_fetch(ZebraHandle zh, zint sysno, int score,
         if (!(rt = recType_byName(zh->reg->recTypes, zh->res,
                                   file_type, &clientData)))
         {
+            char addinfo_str[100];
+
+            sprintf(addinfo_str, "Could not handle record type %.40s",
+                    file_type);
+                    
+            *addinfo = odr_strdup(odr, addinfo_str);
             return_code = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
         }
         else
@@ -535,10 +677,10 @@ int zebra_record_fetch(ZebraHandle zh, zint sysno, int score,
 
        zebra_snippets_destroy(snippet);
         zebra_snippets_destroy(retrieveCtrl.doc_snippet);
-     }
 
-    stream.destroy(&stream);
-    rec_free(&rec);
+        stream.destroy(&stream);
+        rec_free(&rec);
+    }
 
     return return_code;
 }