X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=84250b544f4b1215267245a15c14a19c456aacbf;hb=b09c1d0aa414cb43fb667be54a29b3cc7279ea85;hp=5c17db509cdae90e723bcde6c98a4314dc063428;hpb=5437b50633032595afe6f87dc0f989bc92a5aea8;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index 5c17db5..84250b5 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,6 +1,6 @@ -/* $Id: extract.c,v 1.166 2004-11-19 10:26:56 heikki Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 - Index Data Aps +/* $Id: extract.c,v 1.172 2005-03-05 09:19:14 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS This file is part of the Zebra server. @@ -85,8 +85,11 @@ static void extract_init (struct recExtractCtrl *p, RecWord *w) w->seqno = 1; w->attrSet = VAL_BIB1; w->attrUse = 1016; + w->attrStr = 0; w->reg_type = 'w'; w->extractCtrl = p; + w->record_id = 0; + w->section_id = 0; } static const char **searchRecordKey (ZebraHandle zh, @@ -113,9 +116,9 @@ static const char **searchRecordKey (ZebraHandle zh, iscz1_decode(decode_handle, &dst, &src); assert(key.len < 4 && key.len > 2); - attrSet = (int) key.mem[0]; - attrUse = (int) key.mem[1]; - seqno = (int) key.mem[2]; + attrSet = (int) key.mem[0] >> 16; + attrUse = (int) key.mem[0] & 65535; + seqno = (int) key.mem[key.len-1]; if (attrUseS == attrUse && attrSetS == attrSet) { @@ -394,7 +397,7 @@ static int file_extract_record(ZebraHandle zh, { RecordAttr *recordAttr; int r; - const char *matchStr; + const char *matchStr = 0; SYSNO sysnotmp; Record rec; off_t recordOffset = 0; @@ -439,6 +442,7 @@ static int file_extract_record(ZebraHandle zh, extractCtrl.tokenAdd = extract_token_add; extractCtrl.schemaAdd = extract_schema_add; extractCtrl.dh = zh->reg->dh; + extractCtrl.match_criteria[0] = '\0'; extractCtrl.handle = zh; for (i = 0; i<256; i++) { @@ -499,36 +503,36 @@ static int file_extract_record(ZebraHandle zh, fname, recordOffset); return 1; } + if (extractCtrl.match_criteria[0]) + matchStr = extractCtrl.match_criteria; } /* perform match if sysno not known and if match criteria is specified */ - - matchStr = NULL; if (!sysno) { sysnotmp = 0; sysno = &sysnotmp; - if (zh->m_record_id && *zh->m_record_id) + + if (matchStr == 0 && zh->m_record_id && *zh->m_record_id) { - char *rinfo; matchStr = fileMatchStr (zh, &zh->reg->keys, fname, zh->m_record_id); - if (matchStr) - { - rinfo = dict_lookup (zh->reg->matchDict, matchStr); - if (rinfo) - { - assert(*rinfo == sizeof(*sysno)); - memcpy (sysno, rinfo+1, sizeof(*sysno)); - } - } - else - { - yaz_log (YLOG_WARN, "Bad match criteria"); - return 0; - } - } + if (!matchStr) + { + yaz_log(YLOG_WARN, "Bad match criteria"); + return 0; + } + } + if (matchStr) + { + char *rinfo = dict_lookup (zh->reg->matchDict, matchStr); + if (rinfo) + { + assert(*rinfo == sizeof(*sysno)); + memcpy (sysno, rinfo+1, sizeof(*sysno)); + } + } } if (! *sysno) @@ -884,6 +888,7 @@ int buffer_extract_record (ZebraHandle zh, extractCtrl.handle = zh; extractCtrl.zebra_maps = zh->reg->zebra_maps; extractCtrl.flagShowRecords = 0; + extractCtrl.match_criteria[0] = '\0'; for (i = 0; i<256; i++) { if (zebra_maps_is_positioned(zh->reg->zebra_maps, i)) @@ -921,6 +926,9 @@ int buffer_extract_record (ZebraHandle zh, /* match criteria */ matchStr = NULL; + if (extractCtrl.match_criteria[0]) + match_criteria = extractCtrl.match_criteria; + if (! *sysno) { char *rinfo; if (match_criteria && *match_criteria) { @@ -1159,6 +1167,7 @@ int explain_extract (void *handle, Record rec, data1_node *n) extractCtrl.seqno[i] = 0; extractCtrl.zebra_maps = zh->reg->zebra_maps; extractCtrl.flagShowRecords = 0; + extractCtrl.match_criteria[0] = '\0'; extractCtrl.handle = handle; if (n) @@ -1227,13 +1236,9 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, const char *src = reckeys->buf + off; struct it_key key; char *dst = (char*) &key; - int attrSet, attrUse; iscz1_decode(decode_handle, &dst, &src); - assert(key.len < 4 && key.len > 2); - - attrSet = (int) key.mem[0]; - attrUse = (int) key.mem[1]; /* sequence in mem[2] */ + assert(key.len == 4); if (zh->reg->key_buf_used + 1024 > (zh->reg->ptr_top -zh->reg->ptr_i)*sizeof(char*)) @@ -1243,11 +1248,8 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, (zh->reg->key_buf)[zh->reg->ptr_top - zh->reg->ptr_i] = (char*)zh->reg->key_buf + zh->reg->key_buf_used; - ch = zebraExplain_lookupSU (zei, attrSet, attrUse); - if (ch < 0) - ch = zebraExplain_addSU (zei, attrSet, attrUse); + ch = key.mem[0]; /* ordinal for field/use/attribute */ - assert (ch > 0); zh->reg->key_buf_used += key_SU_encode (ch,((char*)zh->reg->key_buf) + zh->reg->key_buf_used); @@ -1257,10 +1259,14 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, ((char*)(zh->reg->key_buf))[(zh->reg->key_buf_used)++] = '\0'; ((char*)(zh->reg->key_buf))[(zh->reg->key_buf_used)++] = cmd; - key.len = 2; - key.mem[0] = sysno; - key.mem[1] = key.mem[2]; /* sequence .. */ - + key.len = 3; + if (key.mem[1]) /* filter specified record ID */ + key.mem[0] = key.mem[1]; + else + key.mem[0] = sysno; + key.mem[1] = key.mem[2]; /* section_id */ + key.mem[2] = key.mem[3]; /* sequence .. */ + memcpy ((char*)zh->reg->key_buf + zh->reg->key_buf_used, &key, sizeof(key)); (zh->reg->key_buf_used) += sizeof(key); @@ -1404,19 +1410,17 @@ void extract_flushWriteKeys (ZebraHandle zh, int final) zh->reg->key_buf_used = 0; } -void extract_add_index_string (RecWord *p, const char *str, int length) +void extract_add_it_key (ZebraHandle zh, + int reg_type, + const char *str, int slen, struct it_key *key) { char *dst; - ZebraHandle zh = p->extractCtrl->handle; struct recKeys *keys = &zh->reg->keys; - struct it_key key; - const char *src = (char*) &key; + const char *src = (char*) key; if (keys->buf_used+1024 > keys->buf_max) { - char *b; - - b = (char *) xmalloc (keys->buf_max += 128000); + char *b = (char *) xmalloc (keys->buf_max += 128000); if (keys->buf_used > 0) memcpy (b, keys->buf, keys->buf_used); xfree (keys->buf); @@ -1424,24 +1428,50 @@ void extract_add_index_string (RecWord *p, const char *str, int length) } dst = keys->buf + keys->buf_used; - key.len = 3; - key.mem[0] = p->attrSet; - key.mem[1] = p->attrUse; - key.mem[2] = p->seqno; + iscz1_encode(keys->codec_handle, &dst, &src); + + *dst++ = reg_type; + memcpy (dst, str, slen); + dst += slen; + *dst++ = '\0'; + keys->buf_used = dst - keys->buf; +} + +void extract_add_index_string (RecWord *p, const char *str, int length) +{ + struct it_key key; + + ZebraHandle zh = p->extractCtrl->handle; + ZebraExplainInfo zei = zh->reg->zei; + int ch; + + if (p->attrStr) + { + ch = zebraExplain_lookup_attr_str(zei, p->attrStr); + if (ch < 0) + ch = zebraExplain_add_attr_str(zei, p->attrStr); + } + else + { + ch = zebraExplain_lookup_attr_su(zei, p->attrSet, p->attrUse); + if (ch < 0) + ch = zebraExplain_add_attr_su(zei, p->attrSet, p->attrUse); + } + key.len = 4; + key.mem[0] = ch; + key.mem[1] = p->record_id; + key.mem[2] = p->section_id; + key.mem[3] = p->seqno; #if 0 /* just for debugging .. */ - yaz_log(YLOG_LOG, "set=%d use=%d seqno=%d", p->attrSet, p->attrUse, - p->seqno); + yaz_log(YLOG_LOG, "add: set=%d use=%d " + "record_id=%lld section_id=%lld seqno=%lld", + p->attrSet, p->attrUse, p->record_id, p->section_id, p->seqno); #endif - iscz1_encode(keys->codec_handle, &dst, &src); - - *dst++ = p->reg_type; - memcpy (dst, str, length); - dst += length; - *dst++ = '\0'; - keys->buf_used = dst - keys->buf; + extract_add_it_key(p->extractCtrl->handle, p->reg_type, str, + length, &key); } static void extract_add_sort_string (RecWord *p, const char *str, @@ -1492,9 +1522,11 @@ void extract_add_string (RecWord *p, const char *string, int length) static void extract_add_incomplete_field (RecWord *p) { - const char *b = p->string; - int remain = p->length; + const char *b = p->term_buf; + int remain = p->term_len; const char **map = 0; + + yaz_log(YLOG_DEBUG, "Incomplete field, w='%.*s'", p->term_len, p->term_buf); if (remain > 0) map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain, 0); @@ -1507,7 +1539,7 @@ static void extract_add_incomplete_field (RecWord *p) /* Skip spaces */ while (map && *map && **map == *CHR_SPACE) { - remain = p->length - (b - p->string); + remain = p->term_len - (b - p->term_buf); if (remain > 0) map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain, 0); else @@ -1522,7 +1554,7 @@ static void extract_add_incomplete_field (RecWord *p) while (i < IT_MAX_WORD && *cp) buf[i++] = *(cp++); - remain = p->length - (b - p->string); + remain = p->term_len - (b - p->term_buf); if (remain > 0) map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain, 0); else @@ -1537,13 +1569,13 @@ static void extract_add_incomplete_field (RecWord *p) static void extract_add_complete_field (RecWord *p) { - const char *b = p->string; + const char *b = p->term_buf; char buf[IT_MAX_WORD+1]; const char **map = 0; - int i = 0, remain = p->length; - int first; /* first position */ + int i = 0, remain = p->term_len; -yaz_log(YLOG_DEBUG, "Complete field, w='%.*s'", p->length, p->string); + yaz_log(YLOG_DEBUG, "Complete field, w='%.*s'", + p->term_len, p->term_buf); if (remain > 0) map = zebra_maps_input (p->zebra_maps, p->reg_type, &b, remain, 1); @@ -1552,11 +1584,11 @@ yaz_log(YLOG_DEBUG, "Complete field, w='%.*s'", p->length, p->string); { while (map && *map && **map == *CHR_SPACE) { - remain = p->length - (b - p->string); + remain = p->term_len - (b - p->term_buf); if (remain > 0) { - first = i ? 0 : 1; + int first = i ? 0 : 1; /* first position */ map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain, first); } else @@ -1583,7 +1615,7 @@ yaz_log(YLOG_DEBUG, "Complete field, w='%.*s'", p->length, p->string); while (i < IT_MAX_WORD && *cp) buf[i++] = *(cp++); } - remain = p->length - (b - p->string); + remain = p->term_len - (b - p->term_buf); if (remain > 0) { map = zebra_maps_input (p->zebra_maps, p->reg_type, &b, @@ -1608,10 +1640,10 @@ void extract_token_add (RecWord *p) p->string); #endif if ((wrbuf = zebra_replace(p->zebra_maps, p->reg_type, 0, - p->string, p->length))) + p->term_buf, p->term_len))) { - p->string = wrbuf_buf(wrbuf); - p->length = wrbuf_len(wrbuf); + p->term_buf = wrbuf_buf(wrbuf); + p->term_len = wrbuf_len(wrbuf); } if (zebra_maps_is_complete (p->zebra_maps, p->reg_type)) extract_add_complete_field (p);