X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=7c325cafb2e346358c09cb45493b17c7973319f9;hb=c41c84a497ae744aa825a90f144c85b54f1cd4bb;hp=4309b60e2a99673b0ce50f5b6997affdda740926;hpb=ef696645cc3b7e0f4027008d1dc589c0f0f90c1f;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index 4309b60..7c325ca 100644 --- a/index/extract.c +++ b/index/extract.c @@ -4,7 +4,21 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: extract.c,v $ - * Revision 1.96 1999-05-26 07:49:13 adam + * Revision 1.100 2000-03-20 19:08:36 adam + * Added remote record import using Z39.50 extended services and Segment + * Requests. + * + * Revision 1.99 2000/02/24 10:57:02 adam + * Sequence number incremented after each incomplete-field. + * + * Revision 1.98 1999/09/07 07:19:21 adam + * Work on character mapping. Implemented replace rules. + * + * Revision 1.97 1999/07/06 12:28:04 adam + * Updated record index structure. Format includes version ID. Compression + * algorithm ID is stored for each record block. + * + * Revision 1.96 1999/05/26 07:49:13 adam * C++ compilation. * * Revision 1.95 1999/05/21 12:00:17 adam @@ -406,6 +420,8 @@ int key_open (struct recordGroup *rGroup, int mem) BFiles bfs = rGroup->bfs; int rw = rGroup->flagRw; data1_handle dh = rGroup->dh; + char *recordCompression; + int record_compression = REC_COMPRESS_NONE; if (!mem) mem = atoi(res_get_def (common_resource, "memMax", "4"))*1024*1024; if (mem < 50000) @@ -423,7 +439,13 @@ int key_open (struct recordGroup *rGroup, int mem) return -1; } assert (!records); - records = rec_open (bfs, rw); + recordCompression = res_get_def (common_resource, + "recordCompression", "none"); + if (!strcmp (recordCompression, "none")) + record_compression = REC_COMPRESS_NONE; + if (!strcmp (recordCompression, "bzip2")) + record_compression = REC_COMPRESS_BZIP2; + records = rec_open (bfs, rw, record_compression); if (!records) { dict_close (matchDict); @@ -544,7 +566,7 @@ void key_flush (void) logf (LOG_LOG, "sorting section %d", key_file_no); #if !SORT_EXTRA qsort (key_buf + ptr_top-ptr_i, ptr_i, sizeof(char*), key_qsort_compare); - getFnameTmp (out_fname, key_file_no); + getFnameTmp (common_resource, out_fname, key_file_no); if (!(outf = fopen (out_fname, "wb"))) { @@ -619,7 +641,7 @@ int key_close (struct recordGroup *rGroup) int rw = rGroup->flagRw; if (rw) zebraExplain_runNumberIncrement (zti, 1); - zebraExplain_close (zti, rw, 0); + zebraExplain_close (zti, rw); key_flush (); xfree (key_buf); rec_close (&records); @@ -637,6 +659,7 @@ static void wordInit (struct recExtractCtrl *p, RecWord *w) w->attrSet = VAL_BIB1; w->attrUse = 1016; w->reg_type = 'w'; + w->extractCtrl = p; } static struct sortKey { @@ -794,6 +817,7 @@ static void addIncompleteField (RecWord *p) return; addString (p, buf, i); } + (p->seqnos[p->reg_type])++; /* to separate this from next one */ } static void addCompleteField (RecWord *p) @@ -844,6 +868,13 @@ static void addCompleteField (RecWord *p) static void addRecordKey (RecWord *p) { + WRBUF wrbuf; + if ((wrbuf = zebra_replace(p->zebra_maps, p->reg_type, 0, + p->string, p->length))) + { + p->string = wrbuf_buf(wrbuf); + p->length = wrbuf_len(wrbuf); + } if (zebra_maps_is_complete (p->zebra_maps, p->reg_type)) addCompleteField (p); else @@ -1279,8 +1310,8 @@ static int recordExtract (SYSNO *sysno, const char *fname, extractCtrl.fh = fi; extractCtrl.subType = subType; extractCtrl.init = wordInit; - extractCtrl.addWord = addRecordKey; - extractCtrl.addSchema = addSchema; + extractCtrl.tokenAdd = addRecordKey; + extractCtrl.schemaAdd = addSchema; extractCtrl.dh = rGroup->dh; for (i = 0; i<256; i++) { @@ -1620,7 +1651,7 @@ int fileExtract (SYSNO *sysno, const char *fname, { if (zebraExplain_newDatabase (zti, rGroup->databaseName, rGroup->explainDatabase)) - abort (); + return 0; } if (rGroup->flagStoreData == -1) @@ -1654,59 +1685,6 @@ int fileExtract (SYSNO *sysno, const char *fname, if (rGroup->flagStoreKeys == -1) rGroup->flagStoreKeys = 0; -#if ZEBRASDR - if (rGroup->useSDR) - { - ZebraSdrHandle h; - char xname[128], *xp; - - strncpy (xname, fname, 127); - if (!(xp = strchr (xname, '.'))) - return 0; - *xp = '\0'; - if (strcmp (xp+1, "sdr.bits")) - return 0; - - h = zebraSdr_open (xname); - if (!h) - { - logf (LOG_WARN, "sdr open %s", xname); - return 0; - } - for (;;) - { - unsigned char *buf; - char sdr_name[128]; - int r, segmentno; - - segmentno = zebraSdr_segment (h, 0); - sprintf (sdr_name, "%%%s.%d", xname, segmentno); - -#if 0 - if (segmentno > 20) - break; -#endif - r = zebraSdr_read (h, &buf); - - if (!r) - break; - - fi = file_read_start (0); - fi->sdrbuf = buf; - fi->sdrmax = r; - do - { - file_begin (fi); - r = recordExtract (sysno, sdr_name, rGroup, deleteFlag, fi, - recType, subType); - } while (r && !sysno && fi->file_more); - file_read_stop (fi); - free (buf); - } - zebraSdr_close (h); - return 1; - } -#endif if (sysno && deleteFlag) fd = -1; else @@ -1748,8 +1726,8 @@ static int explain_extract (void *handle, Record rec, data1_node *n) reckeys.prevSeqNo = 0; extractCtrl.init = wordInit; - extractCtrl.addWord = addRecordKey; - extractCtrl.addSchema = addSchema; + extractCtrl.tokenAdd = addRecordKey; + extractCtrl.schemaAdd = addSchema; extractCtrl.dh = rGroup->dh; for (i = 0; i<256; i++) extractCtrl.seqno[i] = 0;