+ int sysno;
+ int i;
+ if (zh->service->active < 2)
+ return;
+ for (i = 0; i<segment->num_segmentRecords; i++)
+ {
+ Z_NamePlusRecord *npr = segment->segmentRecords[i];
+ const char *databaseName = npr->databaseName;
+
+ if (!databaseName)
+ databaseName = zh->admin_databaseName;
+ printf ("--------------%d--------------------\n", i);
+ if (npr->which == Z_NamePlusRecord_intermediateFragment)
+ {
+ Z_FragmentSyntax *fragment = npr->u.intermediateFragment;
+ if (fragment->which == Z_FragmentSyntax_notExternallyTagged)
+ {
+ Odr_oct *oct = fragment->u.notExternallyTagged;
+ printf ("%.*s", (oct->len > 100 ? 100 : oct->len) ,
+ oct->buf);
+
+ sysno = 0;
+ extract_rec_in_mem (zh, "grs.sgml",
+ oct->buf, oct->len,
+ databaseName,
+ 0 /* delete_flag */,
+ 0 /* test_mode */,
+ &sysno /* sysno */,
+ 1 /* store_keys */,
+ 1 /* store_data */,
+ 0 /* match criteria */);
+ }
+ }
+ }
+}
+
+void zebra_admin_create (ZebraHandle zh, const char *database)
+{
+ ZebraService zs = zh->service;
+ if (zebra_register_lock(zh, 1))
+ {
+ zh->errCode = 1019;
+ return;
+ }
+ /* announce database */
+ if (zebraExplain_newDatabase (zs->zei, database, 0 /* explainDatabase */))
+ {
+ zh->errCode = 224;
+ zh->errString = "Database already exist";
+ }
+ zebraExplain_flush (zh->service->zei, 1, zh);
+ extract_index (zh);
+ zebra_register_unlock(zh);
+}
+
+int zebra_string_norm (ZebraHandle zh, unsigned reg_id,
+ const char *input_str, int input_len,
+ char *output_str, int output_len)
+{
+ WRBUF wrbuf;
+ if (!zh->service->zebra_maps)
+ return -1;
+ wrbuf = zebra_replace(zh->service->zebra_maps, reg_id, "",
+ input_str, input_len);
+ if (!wrbuf)
+ return -2;
+ if (wrbuf_len(wrbuf) >= output_len)
+ return -3;
+ if (wrbuf_len(wrbuf))
+ memcpy (output_str, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
+ output_str[wrbuf_len(wrbuf)] = '\0';
+ return wrbuf_len(wrbuf);
+}
+
+static void extract_init (struct recExtractCtrl *p, RecWord *w)
+{
+ w->zebra_maps = p->zebra_maps;
+ w->seqnos = p->seqno;
+ w->attrSet = VAL_BIB1;
+ w->attrUse = 1016;
+ w->reg_type = 'w';
+ w->extractCtrl = p;
+}
+
+static void extract_add_index_string (RecWord *p, const char *string,
+ int length)
+{
+ char *dst;
+ unsigned char attrSet;
+ unsigned short attrUse;
+ int lead = 0;
+ int diff = 0;
+ int *pseqno = &p->seqnos[p->reg_type];
+ ZebraHandle zh = p->extractCtrl->handle;
+ ZebraExplainInfo zei = zh->service->zei;
+ struct recKeys *keys = &zh->keys;
+
+ if (keys->buf_used+1024 > keys->buf_max)
+ {
+ char *b;
+
+ b = (char *) xmalloc (keys->buf_max += 128000);
+ if (keys->buf_used > 0)
+ memcpy (b, keys->buf, keys->buf_used);
+ xfree (keys->buf);
+ keys->buf = b;
+ }
+ dst = keys->buf + keys->buf_used;
+
+ attrSet = p->attrSet;
+ if (keys->buf_used > 0 && keys->prevAttrSet == attrSet)
+ lead |= 1;
+ else
+ keys->prevAttrSet = attrSet;
+ attrUse = p->attrUse;
+ if (keys->buf_used > 0 && keys->prevAttrUse == attrUse)
+ lead |= 2;
+ else
+ keys->prevAttrUse = attrUse;
+#if 1
+ diff = 1 + *pseqno - keys->prevSeqNo;
+ if (diff >= 1 && diff <= 15)
+ lead |= (diff << 2);
+ else
+ diff = 0;
+#endif
+ keys->prevSeqNo = *pseqno;
+
+ *dst++ = lead;
+
+#if SU_SCHEME
+ if ((lead & 3) < 3)
+ {
+ int ch = zebraExplain_lookupSU (zei, attrSet, attrUse);
+ if (ch < 0)
+ {
+ ch = zebraExplain_addSU (zei, attrSet, attrUse);
+ yaz_log (LOG_LOG, "addSU set=%d use=%d SU=%d",
+ attrSet, attrUse, ch);
+ }
+ assert (ch > 0);
+ memcpy (dst, &ch, sizeof(ch));
+ dst += sizeof(ch);
+ }
+#else
+ if (!(lead & 1))
+ {
+ memcpy (dst, &attrSet, sizeof(attrSet));
+ dst += sizeof(attrSet);
+ }
+ if (!(lead & 2))
+ {
+ memcpy (dst, &attrUse, sizeof(attrUse));
+ dst += sizeof(attrUse);
+ }
+#endif
+ *dst++ = p->reg_type;
+ memcpy (dst, string, length);
+ dst += length;
+ *dst++ = '\0';
+
+ if (!diff)
+ {
+ memcpy (dst, pseqno, sizeof(*pseqno));
+ dst += sizeof(*pseqno);
+ }
+ keys->buf_used = dst - keys->buf;
+ if (*pseqno)
+ (*pseqno)++;
+}
+
+static void extract_add_sort_string (RecWord *p, const char *string,
+ int length)
+{
+ struct sortKey *sk;
+ ZebraHandle zh = p->extractCtrl->handle;
+ struct sortKey *sortKeys = zh->sortKeys;
+
+ for (sk = sortKeys; sk; sk = sk->next)
+ if (sk->attrSet == p->attrSet && sk->attrUse == p->attrUse)
+ return;
+
+ sk = (struct sortKey *) xmalloc (sizeof(*sk));
+ sk->next = sortKeys;
+ sortKeys = sk;
+
+ sk->string = (char *) xmalloc (length);
+ sk->length = length;
+ memcpy (sk->string, string, length);
+
+ sk->attrSet = p->attrSet;
+ sk->attrUse = p->attrUse;
+}
+
+static void extract_add_string (RecWord *p, const char *string, int length)
+{
+ assert (length > 0);
+ if (zebra_maps_is_sort (p->zebra_maps, p->reg_type))
+ extract_add_sort_string (p, string, length);
+ else
+ extract_add_index_string (p, string, length);
+}
+
+static void extract_add_incomplete_field (RecWord *p)
+{
+ const char *b = p->string;
+ int remain = p->length;
+ const char **map = 0;
+
+ if (remain > 0)
+ map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
+
+ while (map)
+ {
+ char buf[IT_MAX_WORD+1];
+ int i, remain;
+
+ /* Skip spaces */
+ while (map && *map && **map == *CHR_SPACE)
+ {
+ remain = p->length - (b - p->string);
+ if (remain > 0)
+ map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
+ else
+ map = 0;
+ }
+ if (!map)
+ break;
+ i = 0;
+ while (map && *map && **map != *CHR_SPACE)
+ {
+ const char *cp = *map;
+
+ while (i < IT_MAX_WORD && *cp)
+ buf[i++] = *(cp++);
+ remain = p->length - (b - p->string);
+ if (remain > 0)
+ map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
+ else
+ map = 0;
+ }
+ if (!i)
+ return;
+ extract_add_string (p, buf, i);
+ }
+ (p->seqnos[p->reg_type])++; /* to separate this from next one */
+}
+
+static void extract_add_complete_field (RecWord *p)
+{
+ const char *b = p->string;
+ char buf[IT_MAX_WORD+1];
+ const char **map = 0;
+ int i = 0, remain = p->length;
+
+ if (remain > 0)
+ map = zebra_maps_input (p->zebra_maps, p->reg_type, &b, remain);
+
+ while (remain > 0 && i < IT_MAX_WORD)
+ {
+ while (map && *map && **map == *CHR_SPACE)
+ {
+ remain = p->length - (b - p->string);
+ if (remain > 0)
+ map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
+ else
+ map = 0;
+ }
+ if (!map)
+ break;
+
+ if (i && i < IT_MAX_WORD)
+ buf[i++] = *CHR_SPACE;
+ while (map && *map && **map != *CHR_SPACE)
+ {
+ const char *cp = *map;
+
+ if (i >= IT_MAX_WORD)
+ break;
+ while (i < IT_MAX_WORD && *cp)
+ buf[i++] = *(cp++);
+ remain = p->length - (b - p->string);
+ if (remain > 0)
+ map = zebra_maps_input (p->zebra_maps, p->reg_type, &b,
+ remain);
+ else
+ map = 0;
+ }
+ }
+ if (!i)
+ return;
+ extract_add_string (p, buf, i);
+}
+
+static void extract_token_add (RecWord *p)
+{
+ WRBUF wrbuf;
+ if ((wrbuf = zebra_replace(p->zebra_maps, p->reg_type, 0,
+ p->string, p->length)))
+ {
+ p->string = wrbuf_buf(wrbuf);
+ p->length = wrbuf_len(wrbuf);
+ }
+ if (zebra_maps_is_complete (p->zebra_maps, p->reg_type))
+ extract_add_complete_field (p);
+ else
+ extract_add_incomplete_field(p);
+}
+
+static void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid)
+{
+ ZebraHandle zh = (ZebraHandle) (p->handle);
+ zebraExplain_addSchema (zh->service->zei, oid);
+}
+
+static void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno,
+ int cmd, struct sortKey **skp)
+{
+ struct sortKey *sk = *skp;
+ SortIdx sortIdx = zh->service->sortIdx;
+
+ sortIdx_sysno (sortIdx, sysno);
+ while (sk)
+ {
+ struct sortKey *sk_next = sk->next;
+ sortIdx_type (sortIdx, sk->attrUse);
+ sortIdx_add (sortIdx, sk->string, sk->length);
+ xfree (sk->string);
+ xfree (sk);
+ sk = sk_next;
+ }
+ *skp = 0;
+}
+
+struct encode_info {
+ int sysno;
+ int seqno;
+ int cmd;
+ char buf[768];
+};
+
+void encode_key_init (struct encode_info *i)
+{
+ i->sysno = 0;
+ i->seqno = 0;
+ i->cmd = -1;
+}
+
+char *encode_key_int (int d, char *bp)
+{
+ if (d <= 63)
+ *bp++ = d;
+ else if (d <= 16383)
+ {
+ *bp++ = 64 + (d>>8);
+ *bp++ = d & 255;
+ }
+ else if (d <= 4194303)
+ {
+ *bp++ = 128 + (d>>16);
+ *bp++ = (d>>8) & 255;
+ *bp++ = d & 255;
+ }
+ else
+ {
+ *bp++ = 192 + (d>>24);
+ *bp++ = (d>>16) & 255;
+ *bp++ = (d>>8) & 255;
+ *bp++ = d & 255;
+ }
+ return bp;
+}
+
+void encode_key_write (char *k, struct encode_info *i, FILE *outf)
+{
+ struct it_key key;
+ char *bp = i->buf;
+
+ while ((*bp++ = *k++))
+ ;
+ memcpy (&key, k+1, sizeof(struct it_key));
+ bp = encode_key_int ( (key.sysno - i->sysno) * 2 + *k, bp);
+ if (i->sysno != key.sysno)
+ {
+ i->sysno = key.sysno;
+ i->seqno = 0;
+ }
+ else if (!i->seqno && !key.seqno && i->cmd == *k)
+ return;
+ bp = encode_key_int (key.seqno - i->seqno, bp);
+ i->seqno = key.seqno;
+ i->cmd = *k;
+ if (fwrite (i->buf, bp - i->buf, 1, outf) != 1)
+ {
+ logf (LOG_FATAL|LOG_ERRNO, "fwrite");
+ exit (1);
+ }
+}
+
+static void extract_flushWriteKeys (ZebraHandle zh)
+{
+ FILE *outf;
+ char out_fname[200];
+ char *prevcp, *cp;
+ struct encode_info encode_info;
+ int ptr_i = zh->ptr_i;
+#if SORT_EXTRA
+ int i;
+#endif
+ if (!zh->key_buf || ptr_i <= 0)
+ return;
+
+ (zh->key_file_no)++;
+ logf (LOG_LOG, "sorting section %d", (zh->key_file_no));
+#if !SORT_EXTRA
+ qsort (zh->key_buf + zh->ptr_top - ptr_i, ptr_i, sizeof(char*),
+ key_qsort_compare);
+ extract_get_fname_tmp (zh, out_fname, zh->key_file_no);