Omit CVS Id. Update copyright year.
[idzebra-moved-to-github.git] / index / safari.c
index 056b4ce..be8c567 100644 (file)
@@ -1,8 +1,5 @@
-/* $Id: safari.c,v 1.2 2006-08-14 10:40:15 adam Exp $
-   Copyright (C) 1995-2006
-   Index Data ApS
-
-This file is part of the Zebra server.
+/* This file is part of the Zebra server.
+   Copyright (C) 1995-2008 Index Data
 
 Zebra is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -20,7 +17,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 */
 
-
+#include <yaz/oid_db.h>
 #include <stdio.h>
 #include <assert.h>
 #include <ctype.h>
@@ -29,13 +26,20 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <idzebra/recctrl.h>
 
 struct filter_info {
-    char *sep;
+    int segments;
 };
 
-static void *filter_init (Res res, RecType recType)
+static void *filter_init(Res res, RecType recType)
+{
+    struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
+    tinfo->segments = 0;
+    return tinfo;
+}
+
+static void *filter_init2(Res res, RecType recType)
 {
     struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
-    tinfo->sep = 0;
+    tinfo->segments = 1;
     return tinfo;
 }
 
@@ -47,7 +51,6 @@ static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
 static void filter_destroy(void *clientData)
 {
     struct filter_info *tinfo = clientData;
-    xfree (tinfo->sep);
     xfree (tinfo);
 }
 
@@ -75,7 +78,7 @@ static int fi_getchar(struct fi_info *fi, char *dst)
     {
         if (fi->max <= 0)
             return 0;
-        fi->max = (*fi->p->readf)(fi->p->fh, fi->buf, 4096);
+        fi->max = fi->p->stream->readf(fi->p->stream, fi->buf, 4096);
         fi->offset = 0;
         if (fi->max <= 0)
             return 0;
@@ -112,45 +115,72 @@ static int filter_extract(void *clientData, struct recExtractCtrl *p)
     struct filter_info *tinfo = clientData;
     char line[512];
     RecWord recWord;
+    int ret = RECCTRL_EXTRACT_OK;
     struct fi_info *fi = fi_open(p);
 
 #if 0
     yaz_log(YLOG_LOG, "filter_extract off=%ld",
            (long) (*fi->p->tellf)(fi->p->fh));
 #endif
-    xfree(tinfo->sep);
-    tinfo->sep = 0;
     (*p->init)(p, &recWord);
 
     if (!fi_gets(fi, line, sizeof(line)-1))
-       return RECCTRL_EXTRACT_ERROR_GENERIC;
-    sscanf(line, "%255s", p->match_criteria);
-    
-    recWord.index_type = '0';
-    while (fi_gets(fi, line, sizeof(line)-1))
+        ret = RECCTRL_EXTRACT_EOF;
+    else
     {
-       int nor = 0;
-       char field[40];
-       char *cp;
+        sscanf(line, "%255s", p->match_criteria);
+        while (fi_gets(fi, line, sizeof(line)-1))
+        {
+            int nor = 0;
+            char field[40];
+            const char *cp = line;
+            char type_cstr[2];
 #if 0
-       yaz_log(YLOG_LOG, "safari line: %s", line);
+            yaz_log(YLOG_LOG, "safari line: %s", line);
 #endif
-       if (sscanf(line, ZINT_FORMAT " " ZINT_FORMAT " " ZINT_FORMAT " %39s %n",
-                  &recWord.record_id, &recWord.section_id, &recWord.seqno,
-                  field, &nor) < 4)
-       {
-           yaz_log(YLOG_WARN, "Bad safari record line: %s", line);
-           return RECCTRL_EXTRACT_ERROR_GENERIC;
-       }
-       for (cp = line + nor; *cp == ' '; cp++)
-           ;
-       recWord.index_name = field;
-       recWord.term_buf = cp;
-       recWord.term_len = strlen(cp);
-       (*p->tokenAdd)(&recWord);
+            type_cstr[1] = '\0';
+            if (*cp >= '0' && *cp <= '9')
+                type_cstr[0] = '0'; /* the default is 0 (raw) */
+            else
+                type_cstr[0] = *cp++; /* type given */
+            type_cstr[1] = '\0';
+
+            recWord.index_type = type_cstr;
+            if (tinfo->segments)
+            {
+                if (sscanf(cp, ZINT_FORMAT " " ZINT_FORMAT " " ZINT_FORMAT 
+                           ZINT_FORMAT " %39s %n",
+                           &recWord.record_id, &recWord.section_id, 
+                           &recWord.segment,
+                           &recWord.seqno,
+                           field, &nor) < 5)
+                {
+                    yaz_log(YLOG_WARN, "Bad safari record line: %s", line);
+                    ret = RECCTRL_EXTRACT_ERROR_GENERIC;
+                    break;
+                }
+            }
+            else
+            {
+                if (sscanf(cp, ZINT_FORMAT " " ZINT_FORMAT " " ZINT_FORMAT " %39s %n",
+                           &recWord.record_id, &recWord.section_id, &recWord.seqno,
+                           field, &nor) < 4)
+                {
+                    yaz_log(YLOG_WARN, "Bad safari record line: %s", line);
+                    ret = RECCTRL_EXTRACT_ERROR_GENERIC;
+                    break;
+                }
+            }
+            for (cp = cp + nor; *cp == ' '; cp++)
+                ;
+            recWord.index_name = field;
+            recWord.term_buf = cp;
+            recWord.term_len = strlen(cp);
+            (*p->tokenAdd)(&recWord);
+        }
     }
     fi_close(fi);
-    return RECCTRL_EXTRACT_OK;
+    return ret;
 }
 
 static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
@@ -216,7 +246,7 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
         }
        if (!make_body)
            break;
-        r = (*p->readf)(p->fh, filter_buf + filter_ptr, 4096);
+        r = p->stream->readf(p->stream, filter_buf + filter_ptr, 4096);
         if (r <= 0)
             break;
         filter_ptr += r;
@@ -242,7 +272,7 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
             filter_ptr = p-filter_buf;
         }
     }
-    p->output_format = VAL_SUTRS;
+    p->output_format = yaz_oid_recsyn_sutrs;
     p->rec_buf = filter_buf;
     p->rec_len = filter_ptr; 
     return 0;
@@ -258,6 +288,16 @@ static struct recType filter_type = {
     filter_retrieve
 };
 
+static struct recType filter_type2 = {
+    0,
+    "safari2",
+    filter_init2,
+    filter_config,
+    filter_destroy,
+    filter_extract,
+    filter_retrieve
+};
+
 RecType
 #ifdef IDZEBRA_STATIC_SAFARI
 idzebra_filter_safari
@@ -267,6 +307,7 @@ idzebra_filter
 
 [] = {
     &filter_type,
+    &filter_type2,
     0,
 };
 /*