+ if (zebra_maps_is_icu(zm))
+ snippet_add_icu(p, ch, zm);
+ else
+ {
+ if (zebra_maps_is_complete(zm))
+ snippet_add_complete_field(p, ch, zm);
+ else
+ snippet_add_incomplete_field(p, ch, zm);
+ }
+ }
+}
+
+static void snippet_schema_add(
+ struct recExtractCtrl *p, Odr_oid *oid)
+{
+
+}
+
+void extract_snippet(ZebraHandle zh, zebra_snippets *sn,
+ struct ZebraRecStream *stream,
+ RecType rt, void *recTypeClientData)
+{
+ struct recExtractCtrl extractCtrl;
+ struct snip_rec_info info;
+
+ extractCtrl.stream = stream;
+ extractCtrl.first_record = 1;
+ extractCtrl.init = extract_init;
+ extractCtrl.tokenAdd = snippet_token_add;
+ extractCtrl.schemaAdd = snippet_schema_add;
+ assert(zh->reg);
+ assert(zh->reg->dh);
+
+ extractCtrl.dh = zh->reg->dh;
+
+ info.zh = zh;
+ info.snippets = sn;
+ extractCtrl.handle = &info;
+ extractCtrl.match_criteria[0] = '\0';
+ extractCtrl.staticrank = 0;
+ extractCtrl.action = action_insert;
+
+ init_extractCtrl(zh, &extractCtrl);
+
+ extractCtrl.setStoreData = 0;
+
+ (*rt->extract)(recTypeClientData, &extractCtrl);
+}
+
+static void searchRecordKey(ZebraHandle zh,
+ zebra_rec_keys_t reckeys,
+ const char *index_name,
+ const char **ws, int ws_length)
+{
+ int i;
+ int ch = -1;
+ zinfo_index_category_t cat = zinfo_index_category_index;
+
+ for (i = 0; i<ws_length; i++)
+ ws[i] = NULL;
+
+ if (ch < 0)
+ ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, "0", index_name);
+ if (ch < 0)
+ ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, "p", index_name);
+ if (ch < 0)
+ ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, "w", index_name);
+
+ if (ch < 0)
+ return ;
+
+ if (zebra_rec_keys_rewind(reckeys))
+ {
+ zint startSeq = -1;
+ const char *str;
+ size_t slen;
+ struct it_key key;
+ zint seqno;
+ while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
+ {
+ assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2);
+
+ seqno = key.mem[key.len-1];
+
+ if (key.mem[0] == ch)
+ {
+ zint woff;
+
+ if (startSeq == -1)
+ startSeq = seqno;
+ woff = seqno - startSeq;
+ if (woff >= 0 && woff < ws_length)
+ ws[woff] = str;
+ }
+ }
+ }
+}
+
+#define FILE_MATCH_BLANK "\t "
+
+static char *get_match_from_spec(ZebraHandle zh,
+ zebra_rec_keys_t reckeys,
+ const char *fname, const char *spec)
+{
+ static char dstBuf[2048]; /* static here ??? */
+ char *dst = dstBuf;
+ const char *s = spec;
+
+ while (1)
+ {
+ for (; *s && strchr(FILE_MATCH_BLANK, *s); s++)
+ ;
+ if (!*s)
+ break;
+ if (*s == '(')
+ {
+ const char *ws[32];
+ char attset_str[64], attname_str[64];
+ int i;
+ int first = 1;
+
+ for (s++; strchr(FILE_MATCH_BLANK, *s); s++)
+ ;
+ for (i = 0; *s && *s != ',' && *s != ')' &&
+ !strchr(FILE_MATCH_BLANK, *s); s++)
+ if (i+1 < sizeof(attset_str))
+ attset_str[i++] = *s;
+ attset_str[i] = '\0';
+
+ for (; strchr(FILE_MATCH_BLANK, *s); s++)
+ ;
+ if (*s != ',')
+ strcpy(attname_str, attset_str);
+ else
+ {
+ for (s++; strchr(FILE_MATCH_BLANK, *s); s++)
+ ;
+ for (i = 0; *s && *s != ')' &&
+ !strchr(FILE_MATCH_BLANK, *s); s++)
+ if (i+1 < sizeof(attname_str))
+ attname_str[i++] = *s;
+ attname_str[i] = '\0';
+ }
+ if (*s != ')')
+ {
+ yaz_log(YLOG_WARN, "Missing ) in match criteria %s in group %s",
+ spec, zh->m_group ? zh->m_group : "none");
+ return NULL;
+ }
+ s++;
+
+ searchRecordKey(zh, reckeys, attname_str, ws, 32);
+ if (0) /* for debugging */
+ {
+ for (i = 0; i<32; i++)
+ {
+ if (ws[i])
+ {
+ WRBUF w = wrbuf_hex_str(ws[i]);
+ yaz_log(YLOG_LOG, "ws[%d] = %s", i, wrbuf_cstr(w));
+ wrbuf_destroy(w);
+ }
+ }
+ }
+
+ for (i = 0; i<32; i++)
+ if (ws[i])
+ {
+ if (first)
+ {
+ *dst++ = ' ';
+ first = 0;
+ }
+ strcpy(dst, ws[i]);
+ dst += strlen(ws[i]);
+ }
+ if (first)
+ {
+ yaz_log(YLOG_WARN, "Record didn't contain match"
+ " fields in (%s,%s)", attset_str, attname_str);
+ return NULL;
+ }
+ }
+ else if (*s == '$')
+ {
+ int spec_len;
+ char special[64];
+ const char *spec_src = NULL;
+ const char *s1 = ++s;
+ while (*s1 && !strchr(FILE_MATCH_BLANK, *s1))
+ s1++;
+
+ spec_len = s1 - s;
+ if (spec_len > sizeof(special)-1)
+ spec_len = sizeof(special)-1;
+ memcpy(special, s, spec_len);
+ special[spec_len] = '\0';
+ s = s1;
+
+ if (!strcmp(special, "group"))
+ spec_src = zh->m_group;
+ else if (!strcmp(special, "database"))
+ spec_src = zh->basenames[0];
+ else if (!strcmp(special, "filename")) {
+ spec_src = fname;
+ }
+ else if (!strcmp(special, "type"))
+ spec_src = zh->m_record_type;
+ else
+ spec_src = NULL;
+ if (spec_src)
+ {
+ strcpy(dst, spec_src);
+ dst += strlen(spec_src);
+ }
+ }
+ else if (*s == '\"' || *s == '\'')
+ {
+ int stopMarker = *s++;
+ char tmpString[64];
+ int i = 0;
+
+ while (*s && *s != stopMarker)
+ {
+ if (i+1 < sizeof(tmpString))
+ tmpString[i++] = *s++;
+ }
+ if (*s)
+ s++;
+ tmpString[i] = '\0';
+ strcpy(dst, tmpString);
+ dst += strlen(tmpString);
+ }
+ else
+ {
+ yaz_log(YLOG_WARN, "Syntax error in match criteria %s in group %s",
+ spec, zh->m_group ? zh->m_group : "none");
+ return NULL;
+ }
+ *dst++ = 1;
+ }
+ if (dst == dstBuf)
+ {
+ yaz_log(YLOG_WARN, "No match criteria for record %s in group %s",
+ fname, zh->m_group ? zh->m_group : "none");
+ return NULL;
+ }
+ *dst = '\0';
+
+ if (0) /* for debugging */
+ {
+ WRBUF w = wrbuf_hex_str(dstBuf);
+ yaz_log(YLOG_LOG, "get_match_from_spec %s", wrbuf_cstr(w));
+ wrbuf_destroy(w);
+ }
+
+ return dstBuf;
+}
+
+struct recordLogInfo {
+ const char *fname;
+ int recordOffset;
+ struct recordGroup *rGroup;
+};
+
+/** \brief add the always-matches index entry and map to real record ID
+ \param ctrl record control
+ \param record_id custom record ID
+ \param sysno system record ID
+
+ This function serves two purposes.. It adds the always matches
+ entry and makes a pointer from the custom record ID (if defined)
+ back to the system record ID (sysno)
+ See zebra_recid_to_sysno .
+ */
+static void all_matches_add(struct recExtractCtrl *ctrl, zint record_id,
+ zint sysno)
+{
+ RecWord word;
+ extract_init(ctrl, &word);
+ word.record_id = record_id;
+ /* we use the seqno as placeholder for a way to get back to
+ record database from _ALLRECORDS.. This is used if a custom
+ RECORD was defined */
+ word.seqno = sysno;
+ word.index_name = "_ALLRECORDS";
+ word.index_type = "w";
+
+ extract_add_index_string(&word, zinfo_index_category_alwaysmatches,
+ "", 0);
+}
+
+/* forward declaration */
+ZEBRA_RES zebra_extract_records_stream(ZebraHandle zh,
+ struct ZebraRecStream *stream,
+ enum zebra_recctrl_action_t action,
+ const char *recordType,
+ zint *sysno,
+ const char *match_criteria,
+ const char *fname,
+ RecType recType,
+ void *recTypeClientData);
+
+
+ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname,
+ enum zebra_recctrl_action_t action)
+{
+ ZEBRA_RES r = ZEBRA_OK;
+ int i, fd;
+ char gprefix[128];
+ char ext[128];
+ char ext_res[128];
+ const char *original_record_type = 0;
+ RecType recType;
+ void *recTypeClientData;
+ struct ZebraRecStream stream, *streamp;
+
+ zebra_init_log_level();
+
+ if (!zh->m_group || !*zh->m_group)
+ *gprefix = '\0';